• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45 
46 #include "trace.h"
47 #include "trace_output.h"
48 
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54 
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63 
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68 
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73 
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76 	{ }
77 };
78 
79 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82 	return 0;
83 }
84 
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
91 
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99 
100 cpumask_var_t __read_mostly	tracing_buffer_mask;
101 
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117 
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119 
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122 
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126 	struct module			*mod;
127 	unsigned long			length;
128 };
129 
130 union trace_eval_map_item;
131 
132 struct trace_eval_map_tail {
133 	/*
134 	 * "end" is first and points to NULL as it must be different
135 	 * than "mod" or "eval_string"
136 	 */
137 	union trace_eval_map_item	*next;
138 	const char			*end;	/* points to NULL */
139 };
140 
141 static DEFINE_MUTEX(trace_eval_mutex);
142 
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151 	struct trace_eval_map		map;
152 	struct trace_eval_map_head	head;
153 	struct trace_eval_map_tail	tail;
154 };
155 
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158 
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160 
161 #define MAX_TRACER_SIZE		100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164 
165 static bool allocate_snapshot;
166 
set_cmdline_ftrace(char * str)167 static int __init set_cmdline_ftrace(char *str)
168 {
169 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 	default_bootup_tracer = bootup_tracer_buf;
171 	/* We are using ftrace early, expand it */
172 	ring_buffer_expanded = true;
173 	return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176 
set_ftrace_dump_on_oops(char * str)177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179 	if (*str++ != '=' || !*str) {
180 		ftrace_dump_on_oops = DUMP_ALL;
181 		return 1;
182 	}
183 
184 	if (!strcmp("orig_cpu", str)) {
185 		ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188 
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192 
stop_trace_on_warning(char * str)193 static int __init stop_trace_on_warning(char *str)
194 {
195 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 		__disable_trace_on_warning = 1;
197 	return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200 
boot_alloc_snapshot(char * str)201 static int __init boot_alloc_snapshot(char *str)
202 {
203 	allocate_snapshot = true;
204 	/* We also need the main ring buffer expanded */
205 	ring_buffer_expanded = true;
206 	return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209 
210 
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212 
set_trace_boot_options(char * str)213 static int __init set_trace_boot_options(char *str)
214 {
215 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216 	return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219 
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222 
set_trace_boot_clock(char * str)223 static int __init set_trace_boot_clock(char *str)
224 {
225 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 	trace_boot_clock = trace_boot_clock_buf;
227 	return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230 
set_tracepoint_printk(char * str)231 static int __init set_tracepoint_printk(char *str)
232 {
233 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 		tracepoint_printk = 1;
235 	return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238 
ns2usecs(u64 nsec)239 unsigned long long ns2usecs(u64 nsec)
240 {
241 	nsec += 500;
242 	do_div(nsec, 1000);
243 	return nsec;
244 }
245 
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS						\
248 	(FUNCTION_DEFAULT_FLAGS |					\
249 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
250 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
251 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
252 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253 
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
256 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257 
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261 
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267 	.trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269 
270 LIST_HEAD(ftrace_trace_arrays);
271 
trace_array_get(struct trace_array * this_tr)272 int trace_array_get(struct trace_array *this_tr)
273 {
274 	struct trace_array *tr;
275 	int ret = -ENODEV;
276 
277 	mutex_lock(&trace_types_lock);
278 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279 		if (tr == this_tr) {
280 			tr->ref++;
281 			ret = 0;
282 			break;
283 		}
284 	}
285 	mutex_unlock(&trace_types_lock);
286 
287 	return ret;
288 }
289 
__trace_array_put(struct trace_array * this_tr)290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292 	WARN_ON(!this_tr->ref);
293 	this_tr->ref--;
294 }
295 
trace_array_put(struct trace_array * this_tr)296 void trace_array_put(struct trace_array *this_tr)
297 {
298 	mutex_lock(&trace_types_lock);
299 	__trace_array_put(this_tr);
300 	mutex_unlock(&trace_types_lock);
301 }
302 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 			      struct ring_buffer *buffer,
305 			      struct ring_buffer_event *event)
306 {
307 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 	    !filter_match_preds(call->filter, rec)) {
309 		__trace_event_discard_commit(buffer, event);
310 		return 1;
311 	}
312 
313 	return 0;
314 }
315 
trace_free_pid_list(struct trace_pid_list * pid_list)316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318 	vfree(pid_list->pids);
319 	kfree(pid_list);
320 }
321 
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332 	/*
333 	 * If pid_max changed after filtered_pids was created, we
334 	 * by default ignore all pids greater than the previous pid_max.
335 	 */
336 	if (search_pid >= filtered_pids->pid_max)
337 		return false;
338 
339 	return test_bit(search_pid, filtered_pids->pids);
340 }
341 
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct task_struct * task)352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354 	/*
355 	 * Return false, because if filtered_pids does not exist,
356 	 * all pids are good to trace.
357 	 */
358 	if (!filtered_pids)
359 		return false;
360 
361 	return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363 
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 				  struct task_struct *self,
378 				  struct task_struct *task)
379 {
380 	if (!pid_list)
381 		return;
382 
383 	/* For forks, we only add if the forking task is listed */
384 	if (self) {
385 		if (!trace_find_filtered_pid(pid_list, self->pid))
386 			return;
387 	}
388 
389 	/* Sorry, but we don't support pid_max changing after setting */
390 	if (task->pid >= pid_list->pid_max)
391 		return;
392 
393 	/* "self" is set for forks, and NULL for exits */
394 	if (self)
395 		set_bit(task->pid, pid_list->pids);
396 	else
397 		clear_bit(task->pid, pid_list->pids);
398 }
399 
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414 	unsigned long pid = (unsigned long)v;
415 
416 	(*pos)++;
417 
418 	/* pid already is +1 of the actual prevous bit */
419 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420 
421 	/* Return pid + 1 to allow zero to be represented */
422 	if (pid < pid_list->pid_max)
423 		return (void *)(pid + 1);
424 
425 	return NULL;
426 }
427 
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441 	unsigned long pid;
442 	loff_t l = 0;
443 
444 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 	if (pid >= pid_list->pid_max)
446 		return NULL;
447 
448 	/* Return pid + 1 so that zero can be the exit value */
449 	for (pid++; pid && l < *pos;
450 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451 		;
452 	return (void *)pid;
453 }
454 
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
trace_pid_show(struct seq_file * m,void * v)463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465 	unsigned long pid = (unsigned long)v - 1;
466 
467 	seq_printf(m, "%lu\n", pid);
468 	return 0;
469 }
470 
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE		127
473 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 		    struct trace_pid_list **new_pid_list,
476 		    const char __user *ubuf, size_t cnt)
477 {
478 	struct trace_pid_list *pid_list;
479 	struct trace_parser parser;
480 	unsigned long val;
481 	int nr_pids = 0;
482 	ssize_t read = 0;
483 	ssize_t ret = 0;
484 	loff_t pos;
485 	pid_t pid;
486 
487 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488 		return -ENOMEM;
489 
490 	/*
491 	 * Always recreate a new array. The write is an all or nothing
492 	 * operation. Always create a new array when adding new pids by
493 	 * the user. If the operation fails, then the current list is
494 	 * not modified.
495 	 */
496 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497 	if (!pid_list) {
498 		trace_parser_put(&parser);
499 		return -ENOMEM;
500 	}
501 
502 	pid_list->pid_max = READ_ONCE(pid_max);
503 
504 	/* Only truncating will shrink pid_max */
505 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 		pid_list->pid_max = filtered_pids->pid_max;
507 
508 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 	if (!pid_list->pids) {
510 		trace_parser_put(&parser);
511 		kfree(pid_list);
512 		return -ENOMEM;
513 	}
514 
515 	if (filtered_pids) {
516 		/* copy the current bits to the new max */
517 		for_each_set_bit(pid, filtered_pids->pids,
518 				 filtered_pids->pid_max) {
519 			set_bit(pid, pid_list->pids);
520 			nr_pids++;
521 		}
522 	}
523 
524 	while (cnt > 0) {
525 
526 		pos = 0;
527 
528 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
529 		if (ret < 0 || !trace_parser_loaded(&parser))
530 			break;
531 
532 		read += ret;
533 		ubuf += ret;
534 		cnt -= ret;
535 
536 		parser.buffer[parser.idx] = 0;
537 
538 		ret = -EINVAL;
539 		if (kstrtoul(parser.buffer, 0, &val))
540 			break;
541 		if (val >= pid_list->pid_max)
542 			break;
543 
544 		pid = (pid_t)val;
545 
546 		set_bit(pid, pid_list->pids);
547 		nr_pids++;
548 
549 		trace_parser_clear(&parser);
550 		ret = 0;
551 	}
552 	trace_parser_put(&parser);
553 
554 	if (ret < 0) {
555 		trace_free_pid_list(pid_list);
556 		return ret;
557 	}
558 
559 	if (!nr_pids) {
560 		/* Cleared the list of pids */
561 		trace_free_pid_list(pid_list);
562 		read = ret;
563 		pid_list = NULL;
564 	}
565 
566 	*new_pid_list = pid_list;
567 
568 	return read;
569 }
570 
buffer_ftrace_now(struct trace_buffer * buf,int cpu)571 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 {
573 	u64 ts;
574 
575 	/* Early boot up does not have a buffer yet */
576 	if (!buf->buffer)
577 		return trace_clock_local();
578 
579 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
580 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
581 
582 	return ts;
583 }
584 
ftrace_now(int cpu)585 u64 ftrace_now(int cpu)
586 {
587 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 }
589 
590 /**
591  * tracing_is_enabled - Show if global_trace has been disabled
592  *
593  * Shows if the global trace has been enabled or not. It uses the
594  * mirror flag "buffer_disabled" to be used in fast paths such as for
595  * the irqsoff tracer. But it may be inaccurate due to races. If you
596  * need to know the accurate state, use tracing_is_on() which is a little
597  * slower, but accurate.
598  */
tracing_is_enabled(void)599 int tracing_is_enabled(void)
600 {
601 	/*
602 	 * For quick access (irqsoff uses this in fast path), just
603 	 * return the mirror variable of the state of the ring buffer.
604 	 * It's a little racy, but we don't really care.
605 	 */
606 	smp_rmb();
607 	return !global_trace.buffer_disabled;
608 }
609 
610 /*
611  * trace_buf_size is the size in bytes that is allocated
612  * for a buffer. Note, the number of bytes is always rounded
613  * to page size.
614  *
615  * This number is purposely set to a low number of 16384.
616  * If the dump on oops happens, it will be much appreciated
617  * to not have to wait for all that output. Anyway this can be
618  * boot time and run time configurable.
619  */
620 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
621 
622 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
623 
624 /* trace_types holds a link list of available tracers. */
625 static struct tracer		*trace_types __read_mostly;
626 
627 /*
628  * trace_types_lock is used to protect the trace_types list.
629  */
630 DEFINE_MUTEX(trace_types_lock);
631 
632 /*
633  * serialize the access of the ring buffer
634  *
635  * ring buffer serializes readers, but it is low level protection.
636  * The validity of the events (which returns by ring_buffer_peek() ..etc)
637  * are not protected by ring buffer.
638  *
639  * The content of events may become garbage if we allow other process consumes
640  * these events concurrently:
641  *   A) the page of the consumed events may become a normal page
642  *      (not reader page) in ring buffer, and this page will be rewrited
643  *      by events producer.
644  *   B) The page of the consumed events may become a page for splice_read,
645  *      and this page will be returned to system.
646  *
647  * These primitives allow multi process access to different cpu ring buffer
648  * concurrently.
649  *
650  * These primitives don't distinguish read-only and read-consume access.
651  * Multi read-only access are also serialized.
652  */
653 
654 #ifdef CONFIG_SMP
655 static DECLARE_RWSEM(all_cpu_access_lock);
656 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
657 
trace_access_lock(int cpu)658 static inline void trace_access_lock(int cpu)
659 {
660 	if (cpu == RING_BUFFER_ALL_CPUS) {
661 		/* gain it for accessing the whole ring buffer. */
662 		down_write(&all_cpu_access_lock);
663 	} else {
664 		/* gain it for accessing a cpu ring buffer. */
665 
666 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
667 		down_read(&all_cpu_access_lock);
668 
669 		/* Secondly block other access to this @cpu ring buffer. */
670 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
671 	}
672 }
673 
trace_access_unlock(int cpu)674 static inline void trace_access_unlock(int cpu)
675 {
676 	if (cpu == RING_BUFFER_ALL_CPUS) {
677 		up_write(&all_cpu_access_lock);
678 	} else {
679 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
680 		up_read(&all_cpu_access_lock);
681 	}
682 }
683 
trace_access_lock_init(void)684 static inline void trace_access_lock_init(void)
685 {
686 	int cpu;
687 
688 	for_each_possible_cpu(cpu)
689 		mutex_init(&per_cpu(cpu_access_lock, cpu));
690 }
691 
692 #else
693 
694 static DEFINE_MUTEX(access_lock);
695 
trace_access_lock(int cpu)696 static inline void trace_access_lock(int cpu)
697 {
698 	(void)cpu;
699 	mutex_lock(&access_lock);
700 }
701 
trace_access_unlock(int cpu)702 static inline void trace_access_unlock(int cpu)
703 {
704 	(void)cpu;
705 	mutex_unlock(&access_lock);
706 }
707 
trace_access_lock_init(void)708 static inline void trace_access_lock_init(void)
709 {
710 }
711 
712 #endif
713 
714 #ifdef CONFIG_STACKTRACE
715 static void __ftrace_trace_stack(struct ring_buffer *buffer,
716 				 unsigned long flags,
717 				 int skip, int pc, struct pt_regs *regs);
718 static inline void ftrace_trace_stack(struct trace_array *tr,
719 				      struct ring_buffer *buffer,
720 				      unsigned long flags,
721 				      int skip, int pc, struct pt_regs *regs);
722 
723 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)724 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
725 					unsigned long flags,
726 					int skip, int pc, struct pt_regs *regs)
727 {
728 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)729 static inline void ftrace_trace_stack(struct trace_array *tr,
730 				      struct ring_buffer *buffer,
731 				      unsigned long flags,
732 				      int skip, int pc, struct pt_regs *regs)
733 {
734 }
735 
736 #endif
737 
738 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)739 trace_event_setup(struct ring_buffer_event *event,
740 		  int type, unsigned long flags, int pc)
741 {
742 	struct trace_entry *ent = ring_buffer_event_data(event);
743 
744 	tracing_generic_entry_update(ent, flags, pc);
745 	ent->type = type;
746 }
747 
748 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)749 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
750 			  int type,
751 			  unsigned long len,
752 			  unsigned long flags, int pc)
753 {
754 	struct ring_buffer_event *event;
755 
756 	event = ring_buffer_lock_reserve(buffer, len);
757 	if (event != NULL)
758 		trace_event_setup(event, type, flags, pc);
759 
760 	return event;
761 }
762 
tracer_tracing_on(struct trace_array * tr)763 void tracer_tracing_on(struct trace_array *tr)
764 {
765 	if (tr->trace_buffer.buffer)
766 		ring_buffer_record_on(tr->trace_buffer.buffer);
767 	/*
768 	 * This flag is looked at when buffers haven't been allocated
769 	 * yet, or by some tracers (like irqsoff), that just want to
770 	 * know if the ring buffer has been disabled, but it can handle
771 	 * races of where it gets disabled but we still do a record.
772 	 * As the check is in the fast path of the tracers, it is more
773 	 * important to be fast than accurate.
774 	 */
775 	tr->buffer_disabled = 0;
776 	/* Make the flag seen by readers */
777 	smp_wmb();
778 }
779 
780 /**
781  * tracing_on - enable tracing buffers
782  *
783  * This function enables tracing buffers that may have been
784  * disabled with tracing_off.
785  */
tracing_on(void)786 void tracing_on(void)
787 {
788 	tracer_tracing_on(&global_trace);
789 }
790 EXPORT_SYMBOL_GPL(tracing_on);
791 
792 
793 static __always_inline void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)794 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
795 {
796 	__this_cpu_write(trace_taskinfo_save, true);
797 
798 	/* If this is the temp buffer, we need to commit fully */
799 	if (this_cpu_read(trace_buffered_event) == event) {
800 		/* Length is in event->array[0] */
801 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
802 		/* Release the temp buffer */
803 		this_cpu_dec(trace_buffered_event_cnt);
804 	} else
805 		ring_buffer_unlock_commit(buffer, event);
806 }
807 
808 /**
809  * __trace_puts - write a constant string into the trace buffer.
810  * @ip:	   The address of the caller
811  * @str:   The constant string to write
812  * @size:  The size of the string.
813  */
__trace_puts(unsigned long ip,const char * str,int size)814 int __trace_puts(unsigned long ip, const char *str, int size)
815 {
816 	struct ring_buffer_event *event;
817 	struct ring_buffer *buffer;
818 	struct print_entry *entry;
819 	unsigned long irq_flags;
820 	int alloc;
821 	int pc;
822 
823 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
824 		return 0;
825 
826 	pc = preempt_count();
827 
828 	if (unlikely(tracing_selftest_running || tracing_disabled))
829 		return 0;
830 
831 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
832 
833 	local_save_flags(irq_flags);
834 	buffer = global_trace.trace_buffer.buffer;
835 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
836 					    irq_flags, pc);
837 	if (!event)
838 		return 0;
839 
840 	entry = ring_buffer_event_data(event);
841 	entry->ip = ip;
842 
843 	memcpy(&entry->buf, str, size);
844 
845 	/* Add a newline if necessary */
846 	if (entry->buf[size - 1] != '\n') {
847 		entry->buf[size] = '\n';
848 		entry->buf[size + 1] = '\0';
849 	} else
850 		entry->buf[size] = '\0';
851 
852 	__buffer_unlock_commit(buffer, event);
853 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854 
855 	return size;
856 }
857 EXPORT_SYMBOL_GPL(__trace_puts);
858 
859 /**
860  * __trace_bputs - write the pointer to a constant string into trace buffer
861  * @ip:	   The address of the caller
862  * @str:   The constant string to write to the buffer to
863  */
__trace_bputs(unsigned long ip,const char * str)864 int __trace_bputs(unsigned long ip, const char *str)
865 {
866 	struct ring_buffer_event *event;
867 	struct ring_buffer *buffer;
868 	struct bputs_entry *entry;
869 	unsigned long irq_flags;
870 	int size = sizeof(struct bputs_entry);
871 	int pc;
872 
873 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
874 		return 0;
875 
876 	pc = preempt_count();
877 
878 	if (unlikely(tracing_selftest_running || tracing_disabled))
879 		return 0;
880 
881 	local_save_flags(irq_flags);
882 	buffer = global_trace.trace_buffer.buffer;
883 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
884 					    irq_flags, pc);
885 	if (!event)
886 		return 0;
887 
888 	entry = ring_buffer_event_data(event);
889 	entry->ip			= ip;
890 	entry->str			= str;
891 
892 	__buffer_unlock_commit(buffer, event);
893 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894 
895 	return 1;
896 }
897 EXPORT_SYMBOL_GPL(__trace_bputs);
898 
899 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance(struct trace_array * tr)900 void tracing_snapshot_instance(struct trace_array *tr)
901 {
902 	struct tracer *tracer = tr->current_trace;
903 	unsigned long flags;
904 
905 	if (in_nmi()) {
906 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
907 		internal_trace_puts("*** snapshot is being ignored        ***\n");
908 		return;
909 	}
910 
911 	if (!tr->allocated_snapshot) {
912 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
913 		internal_trace_puts("*** stopping trace here!   ***\n");
914 		tracing_off();
915 		return;
916 	}
917 
918 	/* Note, snapshot can not be used when the tracer uses it */
919 	if (tracer->use_max_tr) {
920 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
921 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922 		return;
923 	}
924 
925 	local_irq_save(flags);
926 	update_max_tr(tr, current, smp_processor_id());
927 	local_irq_restore(flags);
928 }
929 
930 /**
931  * trace_snapshot - take a snapshot of the current buffer.
932  *
933  * This causes a swap between the snapshot buffer and the current live
934  * tracing buffer. You can use this to take snapshots of the live
935  * trace when some condition is triggered, but continue to trace.
936  *
937  * Note, make sure to allocate the snapshot with either
938  * a tracing_snapshot_alloc(), or by doing it manually
939  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
940  *
941  * If the snapshot buffer is not allocated, it will stop tracing.
942  * Basically making a permanent snapshot.
943  */
tracing_snapshot(void)944 void tracing_snapshot(void)
945 {
946 	struct trace_array *tr = &global_trace;
947 
948 	tracing_snapshot_instance(tr);
949 }
950 EXPORT_SYMBOL_GPL(tracing_snapshot);
951 
952 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
953 					struct trace_buffer *size_buf, int cpu_id);
954 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
955 
tracing_alloc_snapshot_instance(struct trace_array * tr)956 int tracing_alloc_snapshot_instance(struct trace_array *tr)
957 {
958 	int ret;
959 
960 	if (!tr->allocated_snapshot) {
961 
962 		/* allocate spare buffer */
963 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
964 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
965 		if (ret < 0)
966 			return ret;
967 
968 		tr->allocated_snapshot = true;
969 	}
970 
971 	return 0;
972 }
973 
free_snapshot(struct trace_array * tr)974 static void free_snapshot(struct trace_array *tr)
975 {
976 	/*
977 	 * We don't free the ring buffer. instead, resize it because
978 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
979 	 * we want preserve it.
980 	 */
981 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
982 	set_buffer_entries(&tr->max_buffer, 1);
983 	tracing_reset_online_cpus(&tr->max_buffer);
984 	tr->allocated_snapshot = false;
985 }
986 
987 /**
988  * tracing_alloc_snapshot - allocate snapshot buffer.
989  *
990  * This only allocates the snapshot buffer if it isn't already
991  * allocated - it doesn't also take a snapshot.
992  *
993  * This is meant to be used in cases where the snapshot buffer needs
994  * to be set up for events that can't sleep but need to be able to
995  * trigger a snapshot.
996  */
tracing_alloc_snapshot(void)997 int tracing_alloc_snapshot(void)
998 {
999 	struct trace_array *tr = &global_trace;
1000 	int ret;
1001 
1002 	ret = tracing_alloc_snapshot_instance(tr);
1003 	WARN_ON(ret < 0);
1004 
1005 	return ret;
1006 }
1007 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1008 
1009 /**
1010  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1011  *
1012  * This is similar to trace_snapshot(), but it will allocate the
1013  * snapshot buffer if it isn't already allocated. Use this only
1014  * where it is safe to sleep, as the allocation may sleep.
1015  *
1016  * This causes a swap between the snapshot buffer and the current live
1017  * tracing buffer. You can use this to take snapshots of the live
1018  * trace when some condition is triggered, but continue to trace.
1019  */
tracing_snapshot_alloc(void)1020 void tracing_snapshot_alloc(void)
1021 {
1022 	int ret;
1023 
1024 	ret = tracing_alloc_snapshot();
1025 	if (ret < 0)
1026 		return;
1027 
1028 	tracing_snapshot();
1029 }
1030 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1031 #else
tracing_snapshot(void)1032 void tracing_snapshot(void)
1033 {
1034 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1035 }
1036 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_alloc_snapshot(void)1037 int tracing_alloc_snapshot(void)
1038 {
1039 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1040 	return -ENODEV;
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1043 void tracing_snapshot_alloc(void)
1044 {
1045 	/* Give warning */
1046 	tracing_snapshot();
1047 }
1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1049 #endif /* CONFIG_TRACER_SNAPSHOT */
1050 
tracer_tracing_off(struct trace_array * tr)1051 void tracer_tracing_off(struct trace_array *tr)
1052 {
1053 	if (tr->trace_buffer.buffer)
1054 		ring_buffer_record_off(tr->trace_buffer.buffer);
1055 	/*
1056 	 * This flag is looked at when buffers haven't been allocated
1057 	 * yet, or by some tracers (like irqsoff), that just want to
1058 	 * know if the ring buffer has been disabled, but it can handle
1059 	 * races of where it gets disabled but we still do a record.
1060 	 * As the check is in the fast path of the tracers, it is more
1061 	 * important to be fast than accurate.
1062 	 */
1063 	tr->buffer_disabled = 1;
1064 	/* Make the flag seen by readers */
1065 	smp_wmb();
1066 }
1067 
1068 /**
1069  * tracing_off - turn off tracing buffers
1070  *
1071  * This function stops the tracing buffers from recording data.
1072  * It does not disable any overhead the tracers themselves may
1073  * be causing. This function simply causes all recording to
1074  * the ring buffers to fail.
1075  */
tracing_off(void)1076 void tracing_off(void)
1077 {
1078 	tracer_tracing_off(&global_trace);
1079 }
1080 EXPORT_SYMBOL_GPL(tracing_off);
1081 
disable_trace_on_warning(void)1082 void disable_trace_on_warning(void)
1083 {
1084 	if (__disable_trace_on_warning)
1085 		tracing_off();
1086 }
1087 
1088 /**
1089  * tracer_tracing_is_on - show real state of ring buffer enabled
1090  * @tr : the trace array to know if ring buffer is enabled
1091  *
1092  * Shows real state of the ring buffer if it is enabled or not.
1093  */
tracer_tracing_is_on(struct trace_array * tr)1094 int tracer_tracing_is_on(struct trace_array *tr)
1095 {
1096 	if (tr->trace_buffer.buffer)
1097 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1098 	return !tr->buffer_disabled;
1099 }
1100 
1101 /**
1102  * tracing_is_on - show state of ring buffers enabled
1103  */
tracing_is_on(void)1104 int tracing_is_on(void)
1105 {
1106 	return tracer_tracing_is_on(&global_trace);
1107 }
1108 EXPORT_SYMBOL_GPL(tracing_is_on);
1109 
set_buf_size(char * str)1110 static int __init set_buf_size(char *str)
1111 {
1112 	unsigned long buf_size;
1113 
1114 	if (!str)
1115 		return 0;
1116 	buf_size = memparse(str, &str);
1117 	/* nr_entries can not be zero */
1118 	if (buf_size == 0)
1119 		return 0;
1120 	trace_buf_size = buf_size;
1121 	return 1;
1122 }
1123 __setup("trace_buf_size=", set_buf_size);
1124 
set_tracing_thresh(char * str)1125 static int __init set_tracing_thresh(char *str)
1126 {
1127 	unsigned long threshold;
1128 	int ret;
1129 
1130 	if (!str)
1131 		return 0;
1132 	ret = kstrtoul(str, 0, &threshold);
1133 	if (ret < 0)
1134 		return 0;
1135 	tracing_thresh = threshold * 1000;
1136 	return 1;
1137 }
1138 __setup("tracing_thresh=", set_tracing_thresh);
1139 
nsecs_to_usecs(unsigned long nsecs)1140 unsigned long nsecs_to_usecs(unsigned long nsecs)
1141 {
1142 	return nsecs / 1000;
1143 }
1144 
1145 /*
1146  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1147  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1148  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1149  * of strings in the order that the evals (enum) were defined.
1150  */
1151 #undef C
1152 #define C(a, b) b
1153 
1154 /* These must match the bit postions in trace_iterator_flags */
1155 static const char *trace_options[] = {
1156 	TRACE_FLAGS
1157 	NULL
1158 };
1159 
1160 static struct {
1161 	u64 (*func)(void);
1162 	const char *name;
1163 	int in_ns;		/* is this clock in nanoseconds? */
1164 } trace_clocks[] = {
1165 	{ trace_clock_local,		"local",	1 },
1166 	{ trace_clock_global,		"global",	1 },
1167 	{ trace_clock_counter,		"counter",	0 },
1168 	{ trace_clock_jiffies,		"uptime",	0 },
1169 	{ trace_clock,			"perf",		1 },
1170 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1171 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1172 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1173 	ARCH_TRACE_CLOCKS
1174 };
1175 
1176 /*
1177  * trace_parser_get_init - gets the buffer for trace parser
1178  */
trace_parser_get_init(struct trace_parser * parser,int size)1179 int trace_parser_get_init(struct trace_parser *parser, int size)
1180 {
1181 	memset(parser, 0, sizeof(*parser));
1182 
1183 	parser->buffer = kmalloc(size, GFP_KERNEL);
1184 	if (!parser->buffer)
1185 		return 1;
1186 
1187 	parser->size = size;
1188 	return 0;
1189 }
1190 
1191 /*
1192  * trace_parser_put - frees the buffer for trace parser
1193  */
trace_parser_put(struct trace_parser * parser)1194 void trace_parser_put(struct trace_parser *parser)
1195 {
1196 	kfree(parser->buffer);
1197 	parser->buffer = NULL;
1198 }
1199 
1200 /*
1201  * trace_get_user - reads the user input string separated by  space
1202  * (matched by isspace(ch))
1203  *
1204  * For each string found the 'struct trace_parser' is updated,
1205  * and the function returns.
1206  *
1207  * Returns number of bytes read.
1208  *
1209  * See kernel/trace/trace.h for 'struct trace_parser' details.
1210  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1211 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1212 	size_t cnt, loff_t *ppos)
1213 {
1214 	char ch;
1215 	size_t read = 0;
1216 	ssize_t ret;
1217 
1218 	if (!*ppos)
1219 		trace_parser_clear(parser);
1220 
1221 	ret = get_user(ch, ubuf++);
1222 	if (ret)
1223 		goto out;
1224 
1225 	read++;
1226 	cnt--;
1227 
1228 	/*
1229 	 * The parser is not finished with the last write,
1230 	 * continue reading the user input without skipping spaces.
1231 	 */
1232 	if (!parser->cont) {
1233 		/* skip white space */
1234 		while (cnt && isspace(ch)) {
1235 			ret = get_user(ch, ubuf++);
1236 			if (ret)
1237 				goto out;
1238 			read++;
1239 			cnt--;
1240 		}
1241 
1242 		/* only spaces were written */
1243 		if (isspace(ch)) {
1244 			*ppos += read;
1245 			ret = read;
1246 			goto out;
1247 		}
1248 
1249 		parser->idx = 0;
1250 	}
1251 
1252 	/* read the non-space input */
1253 	while (cnt && !isspace(ch)) {
1254 		if (parser->idx < parser->size - 1)
1255 			parser->buffer[parser->idx++] = ch;
1256 		else {
1257 			ret = -EINVAL;
1258 			goto out;
1259 		}
1260 		ret = get_user(ch, ubuf++);
1261 		if (ret)
1262 			goto out;
1263 		read++;
1264 		cnt--;
1265 	}
1266 
1267 	/* We either got finished input or we have to wait for another call. */
1268 	if (isspace(ch)) {
1269 		parser->buffer[parser->idx] = 0;
1270 		parser->cont = false;
1271 	} else if (parser->idx < parser->size - 1) {
1272 		parser->cont = true;
1273 		parser->buffer[parser->idx++] = ch;
1274 	} else {
1275 		ret = -EINVAL;
1276 		goto out;
1277 	}
1278 
1279 	*ppos += read;
1280 	ret = read;
1281 
1282 out:
1283 	return ret;
1284 }
1285 
1286 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1287 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1288 {
1289 	int len;
1290 
1291 	if (trace_seq_used(s) <= s->seq.readpos)
1292 		return -EBUSY;
1293 
1294 	len = trace_seq_used(s) - s->seq.readpos;
1295 	if (cnt > len)
1296 		cnt = len;
1297 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1298 
1299 	s->seq.readpos += cnt;
1300 	return cnt;
1301 }
1302 
1303 unsigned long __read_mostly	tracing_thresh;
1304 
1305 #ifdef CONFIG_TRACER_MAX_TRACE
1306 /*
1307  * Copy the new maximum trace into the separate maximum-trace
1308  * structure. (this way the maximum trace is permanently saved,
1309  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1310  */
1311 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1312 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1313 {
1314 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1315 	struct trace_buffer *max_buf = &tr->max_buffer;
1316 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1317 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1318 
1319 	max_buf->cpu = cpu;
1320 	max_buf->time_start = data->preempt_timestamp;
1321 
1322 	max_data->saved_latency = tr->max_latency;
1323 	max_data->critical_start = data->critical_start;
1324 	max_data->critical_end = data->critical_end;
1325 
1326 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1327 	max_data->pid = tsk->pid;
1328 	/*
1329 	 * If tsk == current, then use current_uid(), as that does not use
1330 	 * RCU. The irq tracer can be called out of RCU scope.
1331 	 */
1332 	if (tsk == current)
1333 		max_data->uid = current_uid();
1334 	else
1335 		max_data->uid = task_uid(tsk);
1336 
1337 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1338 	max_data->policy = tsk->policy;
1339 	max_data->rt_priority = tsk->rt_priority;
1340 
1341 	/* record this tasks comm */
1342 	tracing_record_cmdline(tsk);
1343 }
1344 
1345 /**
1346  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1347  * @tr: tracer
1348  * @tsk: the task with the latency
1349  * @cpu: The cpu that initiated the trace.
1350  *
1351  * Flip the buffers between the @tr and the max_tr and record information
1352  * about which task was the cause of this latency.
1353  */
1354 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1355 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1356 {
1357 	struct ring_buffer *buf;
1358 
1359 	if (tr->stop_count)
1360 		return;
1361 
1362 	WARN_ON_ONCE(!irqs_disabled());
1363 
1364 	if (!tr->allocated_snapshot) {
1365 		/* Only the nop tracer should hit this when disabling */
1366 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1367 		return;
1368 	}
1369 
1370 	arch_spin_lock(&tr->max_lock);
1371 
1372 	/* Inherit the recordable setting from trace_buffer */
1373 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1374 		ring_buffer_record_on(tr->max_buffer.buffer);
1375 	else
1376 		ring_buffer_record_off(tr->max_buffer.buffer);
1377 
1378 	buf = tr->trace_buffer.buffer;
1379 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380 	tr->max_buffer.buffer = buf;
1381 
1382 	__update_max_tr(tr, tsk, cpu);
1383 	arch_spin_unlock(&tr->max_lock);
1384 }
1385 
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397 	int ret;
1398 
1399 	if (tr->stop_count)
1400 		return;
1401 
1402 	WARN_ON_ONCE(!irqs_disabled());
1403 	if (!tr->allocated_snapshot) {
1404 		/* Only the nop tracer should hit this when disabling */
1405 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406 		return;
1407 	}
1408 
1409 	arch_spin_lock(&tr->max_lock);
1410 
1411 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412 
1413 	if (ret == -EBUSY) {
1414 		/*
1415 		 * We failed to swap the buffer due to a commit taking
1416 		 * place on this CPU. We fail to record, but we reset
1417 		 * the max trace buffer (no one writes directly to it)
1418 		 * and flag that it failed.
1419 		 */
1420 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421 			"Failed to swap buffers due to commit in progress\n");
1422 	}
1423 
1424 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425 
1426 	__update_max_tr(tr, tsk, cpu);
1427 	arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430 
wait_on_pipe(struct trace_iterator * iter,bool full)1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433 	/* Iterators are static, they should be filled or empty */
1434 	if (trace_buffer_iter(iter, iter->cpu_file))
1435 		return 0;
1436 
1437 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438 				full);
1439 }
1440 
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443 
1444 struct trace_selftests {
1445 	struct list_head		list;
1446 	struct tracer			*type;
1447 };
1448 
1449 static LIST_HEAD(postponed_selftests);
1450 
save_selftest(struct tracer * type)1451 static int save_selftest(struct tracer *type)
1452 {
1453 	struct trace_selftests *selftest;
1454 
1455 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456 	if (!selftest)
1457 		return -ENOMEM;
1458 
1459 	selftest->type = type;
1460 	list_add(&selftest->list, &postponed_selftests);
1461 	return 0;
1462 }
1463 
run_tracer_selftest(struct tracer * type)1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466 	struct trace_array *tr = &global_trace;
1467 	struct tracer *saved_tracer = tr->current_trace;
1468 	int ret;
1469 
1470 	if (!type->selftest || tracing_selftest_disabled)
1471 		return 0;
1472 
1473 	/*
1474 	 * If a tracer registers early in boot up (before scheduling is
1475 	 * initialized and such), then do not run its selftests yet.
1476 	 * Instead, run it a little later in the boot process.
1477 	 */
1478 	if (!selftests_can_run)
1479 		return save_selftest(type);
1480 
1481 	/*
1482 	 * Run a selftest on this tracer.
1483 	 * Here we reset the trace buffer, and set the current
1484 	 * tracer to be this tracer. The tracer can then run some
1485 	 * internal tracing to verify that everything is in order.
1486 	 * If we fail, we do not register this tracer.
1487 	 */
1488 	tracing_reset_online_cpus(&tr->trace_buffer);
1489 
1490 	tr->current_trace = type;
1491 
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493 	if (type->use_max_tr) {
1494 		/* If we expanded the buffers, make sure the max is expanded too */
1495 		if (ring_buffer_expanded)
1496 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497 					   RING_BUFFER_ALL_CPUS);
1498 		tr->allocated_snapshot = true;
1499 	}
1500 #endif
1501 
1502 	/* the test is responsible for initializing and enabling */
1503 	pr_info("Testing tracer %s: ", type->name);
1504 	ret = type->selftest(type, tr);
1505 	/* the test is responsible for resetting too */
1506 	tr->current_trace = saved_tracer;
1507 	if (ret) {
1508 		printk(KERN_CONT "FAILED!\n");
1509 		/* Add the warning after printing 'FAILED' */
1510 		WARN_ON(1);
1511 		return -1;
1512 	}
1513 	/* Only reset on passing, to avoid touching corrupted buffers */
1514 	tracing_reset_online_cpus(&tr->trace_buffer);
1515 
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517 	if (type->use_max_tr) {
1518 		tr->allocated_snapshot = false;
1519 
1520 		/* Shrink the max buffer again */
1521 		if (ring_buffer_expanded)
1522 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1523 					   RING_BUFFER_ALL_CPUS);
1524 	}
1525 #endif
1526 
1527 	printk(KERN_CONT "PASSED\n");
1528 	return 0;
1529 }
1530 
init_trace_selftests(void)1531 static __init int init_trace_selftests(void)
1532 {
1533 	struct trace_selftests *p, *n;
1534 	struct tracer *t, **last;
1535 	int ret;
1536 
1537 	selftests_can_run = true;
1538 
1539 	mutex_lock(&trace_types_lock);
1540 
1541 	if (list_empty(&postponed_selftests))
1542 		goto out;
1543 
1544 	pr_info("Running postponed tracer tests:\n");
1545 
1546 	tracing_selftest_running = true;
1547 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1548 		ret = run_tracer_selftest(p->type);
1549 		/* If the test fails, then warn and remove from available_tracers */
1550 		if (ret < 0) {
1551 			WARN(1, "tracer: %s failed selftest, disabling\n",
1552 			     p->type->name);
1553 			last = &trace_types;
1554 			for (t = trace_types; t; t = t->next) {
1555 				if (t == p->type) {
1556 					*last = t->next;
1557 					break;
1558 				}
1559 				last = &t->next;
1560 			}
1561 		}
1562 		list_del(&p->list);
1563 		kfree(p);
1564 	}
1565 	tracing_selftest_running = false;
1566 
1567  out:
1568 	mutex_unlock(&trace_types_lock);
1569 
1570 	return 0;
1571 }
1572 core_initcall(init_trace_selftests);
1573 #else
run_tracer_selftest(struct tracer * type)1574 static inline int run_tracer_selftest(struct tracer *type)
1575 {
1576 	return 0;
1577 }
1578 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1579 
1580 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1581 
1582 static void __init apply_trace_boot_options(void);
1583 
1584 /**
1585  * register_tracer - register a tracer with the ftrace system.
1586  * @type - the plugin for the tracer
1587  *
1588  * Register a new plugin tracer.
1589  */
register_tracer(struct tracer * type)1590 int __init register_tracer(struct tracer *type)
1591 {
1592 	struct tracer *t;
1593 	int ret = 0;
1594 
1595 	if (!type->name) {
1596 		pr_info("Tracer must have a name\n");
1597 		return -1;
1598 	}
1599 
1600 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1601 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1602 		return -1;
1603 	}
1604 
1605 	mutex_lock(&trace_types_lock);
1606 
1607 	tracing_selftest_running = true;
1608 
1609 	for (t = trace_types; t; t = t->next) {
1610 		if (strcmp(type->name, t->name) == 0) {
1611 			/* already found */
1612 			pr_info("Tracer %s already registered\n",
1613 				type->name);
1614 			ret = -1;
1615 			goto out;
1616 		}
1617 	}
1618 
1619 	if (!type->set_flag)
1620 		type->set_flag = &dummy_set_flag;
1621 	if (!type->flags) {
1622 		/*allocate a dummy tracer_flags*/
1623 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1624 		if (!type->flags) {
1625 			ret = -ENOMEM;
1626 			goto out;
1627 		}
1628 		type->flags->val = 0;
1629 		type->flags->opts = dummy_tracer_opt;
1630 	} else
1631 		if (!type->flags->opts)
1632 			type->flags->opts = dummy_tracer_opt;
1633 
1634 	/* store the tracer for __set_tracer_option */
1635 	type->flags->trace = type;
1636 
1637 	ret = run_tracer_selftest(type);
1638 	if (ret < 0)
1639 		goto out;
1640 
1641 	type->next = trace_types;
1642 	trace_types = type;
1643 	add_tracer_options(&global_trace, type);
1644 
1645  out:
1646 	tracing_selftest_running = false;
1647 	mutex_unlock(&trace_types_lock);
1648 
1649 	if (ret || !default_bootup_tracer)
1650 		goto out_unlock;
1651 
1652 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1653 		goto out_unlock;
1654 
1655 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1656 	/* Do we want this tracer to start on bootup? */
1657 	tracing_set_tracer(&global_trace, type->name);
1658 	default_bootup_tracer = NULL;
1659 
1660 	apply_trace_boot_options();
1661 
1662 	/* disable other selftests, since this will break it. */
1663 	tracing_selftest_disabled = true;
1664 #ifdef CONFIG_FTRACE_STARTUP_TEST
1665 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1666 	       type->name);
1667 #endif
1668 
1669  out_unlock:
1670 	return ret;
1671 }
1672 
tracing_reset(struct trace_buffer * buf,int cpu)1673 void tracing_reset(struct trace_buffer *buf, int cpu)
1674 {
1675 	struct ring_buffer *buffer = buf->buffer;
1676 
1677 	if (!buffer)
1678 		return;
1679 
1680 	ring_buffer_record_disable(buffer);
1681 
1682 	/* Make sure all commits have finished */
1683 	synchronize_sched();
1684 	ring_buffer_reset_cpu(buffer, cpu);
1685 
1686 	ring_buffer_record_enable(buffer);
1687 }
1688 
tracing_reset_online_cpus(struct trace_buffer * buf)1689 void tracing_reset_online_cpus(struct trace_buffer *buf)
1690 {
1691 	struct ring_buffer *buffer = buf->buffer;
1692 	int cpu;
1693 
1694 	if (!buffer)
1695 		return;
1696 
1697 	ring_buffer_record_disable(buffer);
1698 
1699 	/* Make sure all commits have finished */
1700 	synchronize_sched();
1701 
1702 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1703 
1704 	for_each_online_cpu(cpu)
1705 		ring_buffer_reset_cpu(buffer, cpu);
1706 
1707 	ring_buffer_record_enable(buffer);
1708 }
1709 
1710 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)1711 void tracing_reset_all_online_cpus(void)
1712 {
1713 	struct trace_array *tr;
1714 
1715 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1716 		if (!tr->clear_trace)
1717 			continue;
1718 		tr->clear_trace = false;
1719 		tracing_reset_online_cpus(&tr->trace_buffer);
1720 #ifdef CONFIG_TRACER_MAX_TRACE
1721 		tracing_reset_online_cpus(&tr->max_buffer);
1722 #endif
1723 	}
1724 }
1725 
1726 static int *tgid_map;
1727 
1728 #define SAVED_CMDLINES_DEFAULT 128
1729 #define NO_CMDLINE_MAP UINT_MAX
1730 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1731 struct saved_cmdlines_buffer {
1732 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1733 	unsigned *map_cmdline_to_pid;
1734 	unsigned cmdline_num;
1735 	int cmdline_idx;
1736 	char *saved_cmdlines;
1737 };
1738 static struct saved_cmdlines_buffer *savedcmd;
1739 
1740 /* temporary disable recording */
1741 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1742 
get_saved_cmdlines(int idx)1743 static inline char *get_saved_cmdlines(int idx)
1744 {
1745 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1746 }
1747 
set_cmdline(int idx,const char * cmdline)1748 static inline void set_cmdline(int idx, const char *cmdline)
1749 {
1750 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1751 }
1752 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)1753 static int allocate_cmdlines_buffer(unsigned int val,
1754 				    struct saved_cmdlines_buffer *s)
1755 {
1756 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1757 					GFP_KERNEL);
1758 	if (!s->map_cmdline_to_pid)
1759 		return -ENOMEM;
1760 
1761 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1762 	if (!s->saved_cmdlines) {
1763 		kfree(s->map_cmdline_to_pid);
1764 		return -ENOMEM;
1765 	}
1766 
1767 	s->cmdline_idx = 0;
1768 	s->cmdline_num = val;
1769 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1770 	       sizeof(s->map_pid_to_cmdline));
1771 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1772 	       val * sizeof(*s->map_cmdline_to_pid));
1773 
1774 	return 0;
1775 }
1776 
trace_create_savedcmd(void)1777 static int trace_create_savedcmd(void)
1778 {
1779 	int ret;
1780 
1781 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1782 	if (!savedcmd)
1783 		return -ENOMEM;
1784 
1785 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1786 	if (ret < 0) {
1787 		kfree(savedcmd);
1788 		savedcmd = NULL;
1789 		return -ENOMEM;
1790 	}
1791 
1792 	return 0;
1793 }
1794 
is_tracing_stopped(void)1795 int is_tracing_stopped(void)
1796 {
1797 	return global_trace.stop_count;
1798 }
1799 
1800 /**
1801  * tracing_start - quick start of the tracer
1802  *
1803  * If tracing is enabled but was stopped by tracing_stop,
1804  * this will start the tracer back up.
1805  */
tracing_start(void)1806 void tracing_start(void)
1807 {
1808 	struct ring_buffer *buffer;
1809 	unsigned long flags;
1810 
1811 	if (tracing_disabled)
1812 		return;
1813 
1814 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1815 	if (--global_trace.stop_count) {
1816 		if (global_trace.stop_count < 0) {
1817 			/* Someone screwed up their debugging */
1818 			WARN_ON_ONCE(1);
1819 			global_trace.stop_count = 0;
1820 		}
1821 		goto out;
1822 	}
1823 
1824 	/* Prevent the buffers from switching */
1825 	arch_spin_lock(&global_trace.max_lock);
1826 
1827 	buffer = global_trace.trace_buffer.buffer;
1828 	if (buffer)
1829 		ring_buffer_record_enable(buffer);
1830 
1831 #ifdef CONFIG_TRACER_MAX_TRACE
1832 	buffer = global_trace.max_buffer.buffer;
1833 	if (buffer)
1834 		ring_buffer_record_enable(buffer);
1835 #endif
1836 
1837 	arch_spin_unlock(&global_trace.max_lock);
1838 
1839  out:
1840 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1841 }
1842 
tracing_start_tr(struct trace_array * tr)1843 static void tracing_start_tr(struct trace_array *tr)
1844 {
1845 	struct ring_buffer *buffer;
1846 	unsigned long flags;
1847 
1848 	if (tracing_disabled)
1849 		return;
1850 
1851 	/* If global, we need to also start the max tracer */
1852 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1853 		return tracing_start();
1854 
1855 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1856 
1857 	if (--tr->stop_count) {
1858 		if (tr->stop_count < 0) {
1859 			/* Someone screwed up their debugging */
1860 			WARN_ON_ONCE(1);
1861 			tr->stop_count = 0;
1862 		}
1863 		goto out;
1864 	}
1865 
1866 	buffer = tr->trace_buffer.buffer;
1867 	if (buffer)
1868 		ring_buffer_record_enable(buffer);
1869 
1870  out:
1871 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1872 }
1873 
1874 /**
1875  * tracing_stop - quick stop of the tracer
1876  *
1877  * Light weight way to stop tracing. Use in conjunction with
1878  * tracing_start.
1879  */
tracing_stop(void)1880 void tracing_stop(void)
1881 {
1882 	struct ring_buffer *buffer;
1883 	unsigned long flags;
1884 
1885 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1886 	if (global_trace.stop_count++)
1887 		goto out;
1888 
1889 	/* Prevent the buffers from switching */
1890 	arch_spin_lock(&global_trace.max_lock);
1891 
1892 	buffer = global_trace.trace_buffer.buffer;
1893 	if (buffer)
1894 		ring_buffer_record_disable(buffer);
1895 
1896 #ifdef CONFIG_TRACER_MAX_TRACE
1897 	buffer = global_trace.max_buffer.buffer;
1898 	if (buffer)
1899 		ring_buffer_record_disable(buffer);
1900 #endif
1901 
1902 	arch_spin_unlock(&global_trace.max_lock);
1903 
1904  out:
1905 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1906 }
1907 
tracing_stop_tr(struct trace_array * tr)1908 static void tracing_stop_tr(struct trace_array *tr)
1909 {
1910 	struct ring_buffer *buffer;
1911 	unsigned long flags;
1912 
1913 	/* If global, we need to also stop the max tracer */
1914 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1915 		return tracing_stop();
1916 
1917 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1918 	if (tr->stop_count++)
1919 		goto out;
1920 
1921 	buffer = tr->trace_buffer.buffer;
1922 	if (buffer)
1923 		ring_buffer_record_disable(buffer);
1924 
1925  out:
1926 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1927 }
1928 
trace_save_cmdline(struct task_struct * tsk)1929 static int trace_save_cmdline(struct task_struct *tsk)
1930 {
1931 	unsigned pid, idx;
1932 
1933 	/* treat recording of idle task as a success */
1934 	if (!tsk->pid)
1935 		return 1;
1936 
1937 	if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1938 		return 0;
1939 
1940 	/*
1941 	 * It's not the end of the world if we don't get
1942 	 * the lock, but we also don't want to spin
1943 	 * nor do we want to disable interrupts,
1944 	 * so if we miss here, then better luck next time.
1945 	 */
1946 	if (!arch_spin_trylock(&trace_cmdline_lock))
1947 		return 0;
1948 
1949 	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1950 	if (idx == NO_CMDLINE_MAP) {
1951 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1952 
1953 		/*
1954 		 * Check whether the cmdline buffer at idx has a pid
1955 		 * mapped. We are going to overwrite that entry so we
1956 		 * need to clear the map_pid_to_cmdline. Otherwise we
1957 		 * would read the new comm for the old pid.
1958 		 */
1959 		pid = savedcmd->map_cmdline_to_pid[idx];
1960 		if (pid != NO_CMDLINE_MAP)
1961 			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1962 
1963 		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1964 		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1965 
1966 		savedcmd->cmdline_idx = idx;
1967 	}
1968 
1969 	set_cmdline(idx, tsk->comm);
1970 
1971 	arch_spin_unlock(&trace_cmdline_lock);
1972 
1973 	return 1;
1974 }
1975 
__trace_find_cmdline(int pid,char comm[])1976 static void __trace_find_cmdline(int pid, char comm[])
1977 {
1978 	unsigned map;
1979 
1980 	if (!pid) {
1981 		strcpy(comm, "<idle>");
1982 		return;
1983 	}
1984 
1985 	if (WARN_ON_ONCE(pid < 0)) {
1986 		strcpy(comm, "<XXX>");
1987 		return;
1988 	}
1989 
1990 	if (pid > PID_MAX_DEFAULT) {
1991 		strcpy(comm, "<...>");
1992 		return;
1993 	}
1994 
1995 	map = savedcmd->map_pid_to_cmdline[pid];
1996 	if (map != NO_CMDLINE_MAP)
1997 		strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1998 	else
1999 		strcpy(comm, "<...>");
2000 }
2001 
trace_find_cmdline(int pid,char comm[])2002 void trace_find_cmdline(int pid, char comm[])
2003 {
2004 	preempt_disable();
2005 	arch_spin_lock(&trace_cmdline_lock);
2006 
2007 	__trace_find_cmdline(pid, comm);
2008 
2009 	arch_spin_unlock(&trace_cmdline_lock);
2010 	preempt_enable();
2011 }
2012 
trace_find_tgid(int pid)2013 int trace_find_tgid(int pid)
2014 {
2015 	if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2016 		return 0;
2017 
2018 	return tgid_map[pid];
2019 }
2020 
trace_save_tgid(struct task_struct * tsk)2021 static int trace_save_tgid(struct task_struct *tsk)
2022 {
2023 	/* treat recording of idle task as a success */
2024 	if (!tsk->pid)
2025 		return 1;
2026 
2027 	if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2028 		return 0;
2029 
2030 	tgid_map[tsk->pid] = tsk->tgid;
2031 	return 1;
2032 }
2033 
tracing_record_taskinfo_skip(int flags)2034 static bool tracing_record_taskinfo_skip(int flags)
2035 {
2036 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2037 		return true;
2038 	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2039 		return true;
2040 	if (!__this_cpu_read(trace_taskinfo_save))
2041 		return true;
2042 	return false;
2043 }
2044 
2045 /**
2046  * tracing_record_taskinfo - record the task info of a task
2047  *
2048  * @task  - task to record
2049  * @flags - TRACE_RECORD_CMDLINE for recording comm
2050  *        - TRACE_RECORD_TGID for recording tgid
2051  */
tracing_record_taskinfo(struct task_struct * task,int flags)2052 void tracing_record_taskinfo(struct task_struct *task, int flags)
2053 {
2054 	bool done;
2055 
2056 	if (tracing_record_taskinfo_skip(flags))
2057 		return;
2058 
2059 	/*
2060 	 * Record as much task information as possible. If some fail, continue
2061 	 * to try to record the others.
2062 	 */
2063 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2064 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2065 
2066 	/* If recording any information failed, retry again soon. */
2067 	if (!done)
2068 		return;
2069 
2070 	__this_cpu_write(trace_taskinfo_save, false);
2071 }
2072 
2073 /**
2074  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2075  *
2076  * @prev - previous task during sched_switch
2077  * @next - next task during sched_switch
2078  * @flags - TRACE_RECORD_CMDLINE for recording comm
2079  *          TRACE_RECORD_TGID for recording tgid
2080  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2081 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2082 					  struct task_struct *next, int flags)
2083 {
2084 	bool done;
2085 
2086 	if (tracing_record_taskinfo_skip(flags))
2087 		return;
2088 
2089 	/*
2090 	 * Record as much task information as possible. If some fail, continue
2091 	 * to try to record the others.
2092 	 */
2093 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2094 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2095 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2096 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2097 
2098 	/* If recording any information failed, retry again soon. */
2099 	if (!done)
2100 		return;
2101 
2102 	__this_cpu_write(trace_taskinfo_save, false);
2103 }
2104 
2105 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2106 void tracing_record_cmdline(struct task_struct *task)
2107 {
2108 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2109 }
2110 
tracing_record_tgid(struct task_struct * task)2111 void tracing_record_tgid(struct task_struct *task)
2112 {
2113 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2114 }
2115 
2116 /*
2117  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2118  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2119  * simplifies those functions and keeps them in sync.
2120  */
trace_handle_return(struct trace_seq * s)2121 enum print_line_t trace_handle_return(struct trace_seq *s)
2122 {
2123 	return trace_seq_has_overflowed(s) ?
2124 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2125 }
2126 EXPORT_SYMBOL_GPL(trace_handle_return);
2127 
2128 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned long flags,int pc)2129 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2130 			     int pc)
2131 {
2132 	struct task_struct *tsk = current;
2133 
2134 	entry->preempt_count		= pc & 0xff;
2135 	entry->pid			= (tsk) ? tsk->pid : 0;
2136 	entry->flags =
2137 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2138 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2139 #else
2140 		TRACE_FLAG_IRQS_NOSUPPORT |
2141 #endif
2142 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2143 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2144 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2145 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2146 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2147 }
2148 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2149 
2150 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2151 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2152 			  int type,
2153 			  unsigned long len,
2154 			  unsigned long flags, int pc)
2155 {
2156 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2157 }
2158 
2159 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2160 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2161 static int trace_buffered_event_ref;
2162 
2163 /**
2164  * trace_buffered_event_enable - enable buffering events
2165  *
2166  * When events are being filtered, it is quicker to use a temporary
2167  * buffer to write the event data into if there's a likely chance
2168  * that it will not be committed. The discard of the ring buffer
2169  * is not as fast as committing, and is much slower than copying
2170  * a commit.
2171  *
2172  * When an event is to be filtered, allocate per cpu buffers to
2173  * write the event data into, and if the event is filtered and discarded
2174  * it is simply dropped, otherwise, the entire data is to be committed
2175  * in one shot.
2176  */
trace_buffered_event_enable(void)2177 void trace_buffered_event_enable(void)
2178 {
2179 	struct ring_buffer_event *event;
2180 	struct page *page;
2181 	int cpu;
2182 
2183 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2184 
2185 	if (trace_buffered_event_ref++)
2186 		return;
2187 
2188 	for_each_tracing_cpu(cpu) {
2189 		page = alloc_pages_node(cpu_to_node(cpu),
2190 					GFP_KERNEL | __GFP_NORETRY, 0);
2191 		if (!page)
2192 			goto failed;
2193 
2194 		event = page_address(page);
2195 		memset(event, 0, sizeof(*event));
2196 
2197 		per_cpu(trace_buffered_event, cpu) = event;
2198 
2199 		preempt_disable();
2200 		if (cpu == smp_processor_id() &&
2201 		    this_cpu_read(trace_buffered_event) !=
2202 		    per_cpu(trace_buffered_event, cpu))
2203 			WARN_ON_ONCE(1);
2204 		preempt_enable();
2205 	}
2206 
2207 	return;
2208  failed:
2209 	trace_buffered_event_disable();
2210 }
2211 
enable_trace_buffered_event(void * data)2212 static void enable_trace_buffered_event(void *data)
2213 {
2214 	/* Probably not needed, but do it anyway */
2215 	smp_rmb();
2216 	this_cpu_dec(trace_buffered_event_cnt);
2217 }
2218 
disable_trace_buffered_event(void * data)2219 static void disable_trace_buffered_event(void *data)
2220 {
2221 	this_cpu_inc(trace_buffered_event_cnt);
2222 }
2223 
2224 /**
2225  * trace_buffered_event_disable - disable buffering events
2226  *
2227  * When a filter is removed, it is faster to not use the buffered
2228  * events, and to commit directly into the ring buffer. Free up
2229  * the temp buffers when there are no more users. This requires
2230  * special synchronization with current events.
2231  */
trace_buffered_event_disable(void)2232 void trace_buffered_event_disable(void)
2233 {
2234 	int cpu;
2235 
2236 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2237 
2238 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2239 		return;
2240 
2241 	if (--trace_buffered_event_ref)
2242 		return;
2243 
2244 	preempt_disable();
2245 	/* For each CPU, set the buffer as used. */
2246 	smp_call_function_many(tracing_buffer_mask,
2247 			       disable_trace_buffered_event, NULL, 1);
2248 	preempt_enable();
2249 
2250 	/* Wait for all current users to finish */
2251 	synchronize_sched();
2252 
2253 	for_each_tracing_cpu(cpu) {
2254 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2255 		per_cpu(trace_buffered_event, cpu) = NULL;
2256 	}
2257 	/*
2258 	 * Make sure trace_buffered_event is NULL before clearing
2259 	 * trace_buffered_event_cnt.
2260 	 */
2261 	smp_wmb();
2262 
2263 	preempt_disable();
2264 	/* Do the work on each cpu */
2265 	smp_call_function_many(tracing_buffer_mask,
2266 			       enable_trace_buffered_event, NULL, 1);
2267 	preempt_enable();
2268 }
2269 
2270 static struct ring_buffer *temp_buffer;
2271 
2272 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2273 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2274 			  struct trace_event_file *trace_file,
2275 			  int type, unsigned long len,
2276 			  unsigned long flags, int pc)
2277 {
2278 	struct ring_buffer_event *entry;
2279 	int val;
2280 
2281 	*current_rb = trace_file->tr->trace_buffer.buffer;
2282 
2283 	if ((trace_file->flags &
2284 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2285 	    (entry = this_cpu_read(trace_buffered_event))) {
2286 		/* Try to use the per cpu buffer first */
2287 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2288 		if (val == 1) {
2289 			trace_event_setup(entry, type, flags, pc);
2290 			entry->array[0] = len;
2291 			return entry;
2292 		}
2293 		this_cpu_dec(trace_buffered_event_cnt);
2294 	}
2295 
2296 	entry = __trace_buffer_lock_reserve(*current_rb,
2297 					    type, len, flags, pc);
2298 	/*
2299 	 * If tracing is off, but we have triggers enabled
2300 	 * we still need to look at the event data. Use the temp_buffer
2301 	 * to store the trace event for the tigger to use. It's recusive
2302 	 * safe and will not be recorded anywhere.
2303 	 */
2304 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2305 		*current_rb = temp_buffer;
2306 		entry = __trace_buffer_lock_reserve(*current_rb,
2307 						    type, len, flags, pc);
2308 	}
2309 	return entry;
2310 }
2311 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2312 
2313 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2314 static DEFINE_MUTEX(tracepoint_printk_mutex);
2315 
output_printk(struct trace_event_buffer * fbuffer)2316 static void output_printk(struct trace_event_buffer *fbuffer)
2317 {
2318 	struct trace_event_call *event_call;
2319 	struct trace_event *event;
2320 	unsigned long flags;
2321 	struct trace_iterator *iter = tracepoint_print_iter;
2322 
2323 	/* We should never get here if iter is NULL */
2324 	if (WARN_ON_ONCE(!iter))
2325 		return;
2326 
2327 	event_call = fbuffer->trace_file->event_call;
2328 	if (!event_call || !event_call->event.funcs ||
2329 	    !event_call->event.funcs->trace)
2330 		return;
2331 
2332 	event = &fbuffer->trace_file->event_call->event;
2333 
2334 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2335 	trace_seq_init(&iter->seq);
2336 	iter->ent = fbuffer->entry;
2337 	event_call->event.funcs->trace(iter, 0, event);
2338 	trace_seq_putc(&iter->seq, 0);
2339 	printk("%s", iter->seq.buffer);
2340 
2341 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2342 }
2343 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2344 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2345 			     void __user *buffer, size_t *lenp,
2346 			     loff_t *ppos)
2347 {
2348 	int save_tracepoint_printk;
2349 	int ret;
2350 
2351 	mutex_lock(&tracepoint_printk_mutex);
2352 	save_tracepoint_printk = tracepoint_printk;
2353 
2354 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2355 
2356 	/*
2357 	 * This will force exiting early, as tracepoint_printk
2358 	 * is always zero when tracepoint_printk_iter is not allocated
2359 	 */
2360 	if (!tracepoint_print_iter)
2361 		tracepoint_printk = 0;
2362 
2363 	if (save_tracepoint_printk == tracepoint_printk)
2364 		goto out;
2365 
2366 	if (tracepoint_printk)
2367 		static_key_enable(&tracepoint_printk_key.key);
2368 	else
2369 		static_key_disable(&tracepoint_printk_key.key);
2370 
2371  out:
2372 	mutex_unlock(&tracepoint_printk_mutex);
2373 
2374 	return ret;
2375 }
2376 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2377 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2378 {
2379 	if (static_key_false(&tracepoint_printk_key.key))
2380 		output_printk(fbuffer);
2381 
2382 	event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2383 				    fbuffer->event, fbuffer->entry,
2384 				    fbuffer->flags, fbuffer->pc);
2385 }
2386 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2387 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2388 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2389 				     struct ring_buffer *buffer,
2390 				     struct ring_buffer_event *event,
2391 				     unsigned long flags, int pc,
2392 				     struct pt_regs *regs)
2393 {
2394 	__buffer_unlock_commit(buffer, event);
2395 
2396 	/*
2397 	 * If regs is not set, then skip the following callers:
2398 	 *   trace_buffer_unlock_commit_regs
2399 	 *   event_trigger_unlock_commit
2400 	 *   trace_event_buffer_commit
2401 	 *   trace_event_raw_event_sched_switch
2402 	 * Note, we can still get here via blktrace, wakeup tracer
2403 	 * and mmiotrace, but that's ok if they lose a function or
2404 	 * two. They are that meaningful.
2405 	 */
2406 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2407 	ftrace_trace_userstack(buffer, flags, pc);
2408 }
2409 
2410 /*
2411  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2412  */
2413 void
trace_buffer_unlock_commit_nostack(struct ring_buffer * buffer,struct ring_buffer_event * event)2414 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2415 				   struct ring_buffer_event *event)
2416 {
2417 	__buffer_unlock_commit(buffer, event);
2418 }
2419 
2420 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event)2421 trace_process_export(struct trace_export *export,
2422 	       struct ring_buffer_event *event)
2423 {
2424 	struct trace_entry *entry;
2425 	unsigned int size = 0;
2426 
2427 	entry = ring_buffer_event_data(event);
2428 	size = ring_buffer_event_length(event);
2429 	export->write(entry, size);
2430 }
2431 
2432 static DEFINE_MUTEX(ftrace_export_lock);
2433 
2434 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2435 
2436 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2437 
ftrace_exports_enable(void)2438 static inline void ftrace_exports_enable(void)
2439 {
2440 	static_branch_enable(&ftrace_exports_enabled);
2441 }
2442 
ftrace_exports_disable(void)2443 static inline void ftrace_exports_disable(void)
2444 {
2445 	static_branch_disable(&ftrace_exports_enabled);
2446 }
2447 
ftrace_exports(struct ring_buffer_event * event)2448 void ftrace_exports(struct ring_buffer_event *event)
2449 {
2450 	struct trace_export *export;
2451 
2452 	preempt_disable_notrace();
2453 
2454 	export = rcu_dereference_raw_notrace(ftrace_exports_list);
2455 	while (export) {
2456 		trace_process_export(export, event);
2457 		export = rcu_dereference_raw_notrace(export->next);
2458 	}
2459 
2460 	preempt_enable_notrace();
2461 }
2462 
2463 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)2464 add_trace_export(struct trace_export **list, struct trace_export *export)
2465 {
2466 	rcu_assign_pointer(export->next, *list);
2467 	/*
2468 	 * We are entering export into the list but another
2469 	 * CPU might be walking that list. We need to make sure
2470 	 * the export->next pointer is valid before another CPU sees
2471 	 * the export pointer included into the list.
2472 	 */
2473 	rcu_assign_pointer(*list, export);
2474 }
2475 
2476 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)2477 rm_trace_export(struct trace_export **list, struct trace_export *export)
2478 {
2479 	struct trace_export **p;
2480 
2481 	for (p = list; *p != NULL; p = &(*p)->next)
2482 		if (*p == export)
2483 			break;
2484 
2485 	if (*p != export)
2486 		return -1;
2487 
2488 	rcu_assign_pointer(*p, (*p)->next);
2489 
2490 	return 0;
2491 }
2492 
2493 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)2494 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2495 {
2496 	if (*list == NULL)
2497 		ftrace_exports_enable();
2498 
2499 	add_trace_export(list, export);
2500 }
2501 
2502 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)2503 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2504 {
2505 	int ret;
2506 
2507 	ret = rm_trace_export(list, export);
2508 	if (*list == NULL)
2509 		ftrace_exports_disable();
2510 
2511 	return ret;
2512 }
2513 
register_ftrace_export(struct trace_export * export)2514 int register_ftrace_export(struct trace_export *export)
2515 {
2516 	if (WARN_ON_ONCE(!export->write))
2517 		return -1;
2518 
2519 	mutex_lock(&ftrace_export_lock);
2520 
2521 	add_ftrace_export(&ftrace_exports_list, export);
2522 
2523 	mutex_unlock(&ftrace_export_lock);
2524 
2525 	return 0;
2526 }
2527 EXPORT_SYMBOL_GPL(register_ftrace_export);
2528 
unregister_ftrace_export(struct trace_export * export)2529 int unregister_ftrace_export(struct trace_export *export)
2530 {
2531 	int ret;
2532 
2533 	mutex_lock(&ftrace_export_lock);
2534 
2535 	ret = rm_ftrace_export(&ftrace_exports_list, export);
2536 
2537 	mutex_unlock(&ftrace_export_lock);
2538 
2539 	return ret;
2540 }
2541 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2542 
2543 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2544 trace_function(struct trace_array *tr,
2545 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2546 	       int pc)
2547 {
2548 	struct trace_event_call *call = &event_function;
2549 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2550 	struct ring_buffer_event *event;
2551 	struct ftrace_entry *entry;
2552 
2553 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2554 					    flags, pc);
2555 	if (!event)
2556 		return;
2557 	entry	= ring_buffer_event_data(event);
2558 	entry->ip			= ip;
2559 	entry->parent_ip		= parent_ip;
2560 
2561 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2562 		if (static_branch_unlikely(&ftrace_exports_enabled))
2563 			ftrace_exports(event);
2564 		__buffer_unlock_commit(buffer, event);
2565 	}
2566 }
2567 
2568 #ifdef CONFIG_STACKTRACE
2569 
2570 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2571 struct ftrace_stack {
2572 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
2573 };
2574 
2575 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2576 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2577 
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2578 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2579 				 unsigned long flags,
2580 				 int skip, int pc, struct pt_regs *regs)
2581 {
2582 	struct trace_event_call *call = &event_kernel_stack;
2583 	struct ring_buffer_event *event;
2584 	struct stack_entry *entry;
2585 	struct stack_trace trace;
2586 	int use_stack;
2587 	int size = FTRACE_STACK_ENTRIES;
2588 
2589 	trace.nr_entries	= 0;
2590 	trace.skip		= skip;
2591 
2592 	/*
2593 	 * Add two, for this function and the call to save_stack_trace()
2594 	 * If regs is set, then these functions will not be in the way.
2595 	 */
2596 	if (!regs)
2597 		trace.skip += 2;
2598 
2599 	/*
2600 	 * Since events can happen in NMIs there's no safe way to
2601 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2602 	 * or NMI comes in, it will just have to use the default
2603 	 * FTRACE_STACK_SIZE.
2604 	 */
2605 	preempt_disable_notrace();
2606 
2607 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2608 	/*
2609 	 * We don't need any atomic variables, just a barrier.
2610 	 * If an interrupt comes in, we don't care, because it would
2611 	 * have exited and put the counter back to what we want.
2612 	 * We just need a barrier to keep gcc from moving things
2613 	 * around.
2614 	 */
2615 	barrier();
2616 	if (use_stack == 1) {
2617 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
2618 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
2619 
2620 		if (regs)
2621 			save_stack_trace_regs(regs, &trace);
2622 		else
2623 			save_stack_trace(&trace);
2624 
2625 		if (trace.nr_entries > size)
2626 			size = trace.nr_entries;
2627 	} else
2628 		/* From now on, use_stack is a boolean */
2629 		use_stack = 0;
2630 
2631 	size *= sizeof(unsigned long);
2632 
2633 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2634 					    sizeof(*entry) + size, flags, pc);
2635 	if (!event)
2636 		goto out;
2637 	entry = ring_buffer_event_data(event);
2638 
2639 	memset(&entry->caller, 0, size);
2640 
2641 	if (use_stack)
2642 		memcpy(&entry->caller, trace.entries,
2643 		       trace.nr_entries * sizeof(unsigned long));
2644 	else {
2645 		trace.max_entries	= FTRACE_STACK_ENTRIES;
2646 		trace.entries		= entry->caller;
2647 		if (regs)
2648 			save_stack_trace_regs(regs, &trace);
2649 		else
2650 			save_stack_trace(&trace);
2651 	}
2652 
2653 	entry->size = trace.nr_entries;
2654 
2655 	if (!call_filter_check_discard(call, entry, buffer, event))
2656 		__buffer_unlock_commit(buffer, event);
2657 
2658  out:
2659 	/* Again, don't let gcc optimize things here */
2660 	barrier();
2661 	__this_cpu_dec(ftrace_stack_reserve);
2662 	preempt_enable_notrace();
2663 
2664 }
2665 
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2666 static inline void ftrace_trace_stack(struct trace_array *tr,
2667 				      struct ring_buffer *buffer,
2668 				      unsigned long flags,
2669 				      int skip, int pc, struct pt_regs *regs)
2670 {
2671 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2672 		return;
2673 
2674 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
2675 }
2676 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)2677 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2678 		   int pc)
2679 {
2680 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
2681 
2682 	if (rcu_is_watching()) {
2683 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2684 		return;
2685 	}
2686 
2687 	/*
2688 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2689 	 * but if the above rcu_is_watching() failed, then the NMI
2690 	 * triggered someplace critical, and rcu_irq_enter() should
2691 	 * not be called from NMI.
2692 	 */
2693 	if (unlikely(in_nmi()))
2694 		return;
2695 
2696 	/*
2697 	 * It is possible that a function is being traced in a
2698 	 * location that RCU is not watching. A call to
2699 	 * rcu_irq_enter() will make sure that it is, but there's
2700 	 * a few internal rcu functions that could be traced
2701 	 * where that wont work either. In those cases, we just
2702 	 * do nothing.
2703 	 */
2704 	if (unlikely(rcu_irq_enter_disabled()))
2705 		return;
2706 
2707 	rcu_irq_enter_irqson();
2708 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2709 	rcu_irq_exit_irqson();
2710 }
2711 
2712 /**
2713  * trace_dump_stack - record a stack back trace in the trace buffer
2714  * @skip: Number of functions to skip (helper handlers)
2715  */
trace_dump_stack(int skip)2716 void trace_dump_stack(int skip)
2717 {
2718 	unsigned long flags;
2719 
2720 	if (tracing_disabled || tracing_selftest_running)
2721 		return;
2722 
2723 	local_save_flags(flags);
2724 
2725 	/*
2726 	 * Skip 3 more, seems to get us at the caller of
2727 	 * this function.
2728 	 */
2729 	skip += 3;
2730 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
2731 			     flags, skip, preempt_count(), NULL);
2732 }
2733 
2734 static DEFINE_PER_CPU(int, user_stack_count);
2735 
2736 void
ftrace_trace_userstack(struct ring_buffer * buffer,unsigned long flags,int pc)2737 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2738 {
2739 	struct trace_event_call *call = &event_user_stack;
2740 	struct ring_buffer_event *event;
2741 	struct userstack_entry *entry;
2742 	struct stack_trace trace;
2743 
2744 	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2745 		return;
2746 
2747 	/*
2748 	 * NMIs can not handle page faults, even with fix ups.
2749 	 * The save user stack can (and often does) fault.
2750 	 */
2751 	if (unlikely(in_nmi()))
2752 		return;
2753 
2754 	/*
2755 	 * prevent recursion, since the user stack tracing may
2756 	 * trigger other kernel events.
2757 	 */
2758 	preempt_disable();
2759 	if (__this_cpu_read(user_stack_count))
2760 		goto out;
2761 
2762 	__this_cpu_inc(user_stack_count);
2763 
2764 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2765 					    sizeof(*entry), flags, pc);
2766 	if (!event)
2767 		goto out_drop_count;
2768 	entry	= ring_buffer_event_data(event);
2769 
2770 	entry->tgid		= current->tgid;
2771 	memset(&entry->caller, 0, sizeof(entry->caller));
2772 
2773 	trace.nr_entries	= 0;
2774 	trace.max_entries	= FTRACE_STACK_ENTRIES;
2775 	trace.skip		= 0;
2776 	trace.entries		= entry->caller;
2777 
2778 	save_stack_trace_user(&trace);
2779 	if (!call_filter_check_discard(call, entry, buffer, event))
2780 		__buffer_unlock_commit(buffer, event);
2781 
2782  out_drop_count:
2783 	__this_cpu_dec(user_stack_count);
2784  out:
2785 	preempt_enable();
2786 }
2787 
2788 #ifdef UNUSED
__trace_userstack(struct trace_array * tr,unsigned long flags)2789 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2790 {
2791 	ftrace_trace_userstack(tr, flags, preempt_count());
2792 }
2793 #endif /* UNUSED */
2794 
2795 #endif /* CONFIG_STACKTRACE */
2796 
2797 /* created for use with alloc_percpu */
2798 struct trace_buffer_struct {
2799 	int nesting;
2800 	char buffer[4][TRACE_BUF_SIZE];
2801 };
2802 
2803 static struct trace_buffer_struct *trace_percpu_buffer;
2804 
2805 /*
2806  * Thise allows for lockless recording.  If we're nested too deeply, then
2807  * this returns NULL.
2808  */
get_trace_buf(void)2809 static char *get_trace_buf(void)
2810 {
2811 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2812 
2813 	if (!buffer || buffer->nesting >= 4)
2814 		return NULL;
2815 
2816 	buffer->nesting++;
2817 
2818 	/* Interrupts must see nesting incremented before we use the buffer */
2819 	barrier();
2820 	return &buffer->buffer[buffer->nesting][0];
2821 }
2822 
put_trace_buf(void)2823 static void put_trace_buf(void)
2824 {
2825 	/* Don't let the decrement of nesting leak before this */
2826 	barrier();
2827 	this_cpu_dec(trace_percpu_buffer->nesting);
2828 }
2829 
alloc_percpu_trace_buffer(void)2830 static int alloc_percpu_trace_buffer(void)
2831 {
2832 	struct trace_buffer_struct *buffers;
2833 
2834 	buffers = alloc_percpu(struct trace_buffer_struct);
2835 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2836 		return -ENOMEM;
2837 
2838 	trace_percpu_buffer = buffers;
2839 	return 0;
2840 }
2841 
2842 static int buffers_allocated;
2843 
trace_printk_init_buffers(void)2844 void trace_printk_init_buffers(void)
2845 {
2846 	if (buffers_allocated)
2847 		return;
2848 
2849 	if (alloc_percpu_trace_buffer())
2850 		return;
2851 
2852 	/* trace_printk() is for debug use only. Don't use it in production. */
2853 
2854 	pr_warn("\n");
2855 	pr_warn("**********************************************************\n");
2856 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2857 	pr_warn("**                                                      **\n");
2858 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2859 	pr_warn("**                                                      **\n");
2860 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2861 	pr_warn("** unsafe for production use.                           **\n");
2862 	pr_warn("**                                                      **\n");
2863 	pr_warn("** If you see this message and you are not debugging    **\n");
2864 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2865 	pr_warn("**                                                      **\n");
2866 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2867 	pr_warn("**********************************************************\n");
2868 
2869 	/* Expand the buffers to set size */
2870 	tracing_update_buffers();
2871 
2872 	buffers_allocated = 1;
2873 
2874 	/*
2875 	 * trace_printk_init_buffers() can be called by modules.
2876 	 * If that happens, then we need to start cmdline recording
2877 	 * directly here. If the global_trace.buffer is already
2878 	 * allocated here, then this was called by module code.
2879 	 */
2880 	if (global_trace.trace_buffer.buffer)
2881 		tracing_start_cmdline_record();
2882 }
2883 
trace_printk_start_comm(void)2884 void trace_printk_start_comm(void)
2885 {
2886 	/* Start tracing comms if trace printk is set */
2887 	if (!buffers_allocated)
2888 		return;
2889 	tracing_start_cmdline_record();
2890 }
2891 
trace_printk_start_stop_comm(int enabled)2892 static void trace_printk_start_stop_comm(int enabled)
2893 {
2894 	if (!buffers_allocated)
2895 		return;
2896 
2897 	if (enabled)
2898 		tracing_start_cmdline_record();
2899 	else
2900 		tracing_stop_cmdline_record();
2901 }
2902 
2903 /**
2904  * trace_vbprintk - write binary msg to tracing buffer
2905  *
2906  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)2907 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2908 {
2909 	struct trace_event_call *call = &event_bprint;
2910 	struct ring_buffer_event *event;
2911 	struct ring_buffer *buffer;
2912 	struct trace_array *tr = &global_trace;
2913 	struct bprint_entry *entry;
2914 	unsigned long flags;
2915 	char *tbuffer;
2916 	int len = 0, size, pc;
2917 
2918 	if (unlikely(tracing_selftest_running || tracing_disabled))
2919 		return 0;
2920 
2921 	/* Don't pollute graph traces with trace_vprintk internals */
2922 	pause_graph_tracing();
2923 
2924 	pc = preempt_count();
2925 	preempt_disable_notrace();
2926 
2927 	tbuffer = get_trace_buf();
2928 	if (!tbuffer) {
2929 		len = 0;
2930 		goto out_nobuffer;
2931 	}
2932 
2933 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2934 
2935 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2936 		goto out;
2937 
2938 	local_save_flags(flags);
2939 	size = sizeof(*entry) + sizeof(u32) * len;
2940 	buffer = tr->trace_buffer.buffer;
2941 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2942 					    flags, pc);
2943 	if (!event)
2944 		goto out;
2945 	entry = ring_buffer_event_data(event);
2946 	entry->ip			= ip;
2947 	entry->fmt			= fmt;
2948 
2949 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2950 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2951 		__buffer_unlock_commit(buffer, event);
2952 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2953 	}
2954 
2955 out:
2956 	put_trace_buf();
2957 
2958 out_nobuffer:
2959 	preempt_enable_notrace();
2960 	unpause_graph_tracing();
2961 
2962 	return len;
2963 }
2964 EXPORT_SYMBOL_GPL(trace_vbprintk);
2965 
2966 __printf(3, 0)
2967 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)2968 __trace_array_vprintk(struct ring_buffer *buffer,
2969 		      unsigned long ip, const char *fmt, va_list args)
2970 {
2971 	struct trace_event_call *call = &event_print;
2972 	struct ring_buffer_event *event;
2973 	int len = 0, size, pc;
2974 	struct print_entry *entry;
2975 	unsigned long flags;
2976 	char *tbuffer;
2977 
2978 	if (tracing_disabled || tracing_selftest_running)
2979 		return 0;
2980 
2981 	/* Don't pollute graph traces with trace_vprintk internals */
2982 	pause_graph_tracing();
2983 
2984 	pc = preempt_count();
2985 	preempt_disable_notrace();
2986 
2987 
2988 	tbuffer = get_trace_buf();
2989 	if (!tbuffer) {
2990 		len = 0;
2991 		goto out_nobuffer;
2992 	}
2993 
2994 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2995 
2996 	local_save_flags(flags);
2997 	size = sizeof(*entry) + len + 1;
2998 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2999 					    flags, pc);
3000 	if (!event)
3001 		goto out;
3002 	entry = ring_buffer_event_data(event);
3003 	entry->ip = ip;
3004 
3005 	memcpy(&entry->buf, tbuffer, len + 1);
3006 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3007 		__buffer_unlock_commit(buffer, event);
3008 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3009 	}
3010 
3011 out:
3012 	put_trace_buf();
3013 
3014 out_nobuffer:
3015 	preempt_enable_notrace();
3016 	unpause_graph_tracing();
3017 
3018 	return len;
3019 }
3020 
3021 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3022 int trace_array_vprintk(struct trace_array *tr,
3023 			unsigned long ip, const char *fmt, va_list args)
3024 {
3025 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3026 }
3027 
3028 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3029 int trace_array_printk(struct trace_array *tr,
3030 		       unsigned long ip, const char *fmt, ...)
3031 {
3032 	int ret;
3033 	va_list ap;
3034 
3035 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3036 		return 0;
3037 
3038 	va_start(ap, fmt);
3039 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3040 	va_end(ap);
3041 	return ret;
3042 }
3043 
3044 __printf(3, 4)
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)3045 int trace_array_printk_buf(struct ring_buffer *buffer,
3046 			   unsigned long ip, const char *fmt, ...)
3047 {
3048 	int ret;
3049 	va_list ap;
3050 
3051 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3052 		return 0;
3053 
3054 	va_start(ap, fmt);
3055 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3056 	va_end(ap);
3057 	return ret;
3058 }
3059 
3060 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3061 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3062 {
3063 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3064 }
3065 EXPORT_SYMBOL_GPL(trace_vprintk);
3066 
trace_iterator_increment(struct trace_iterator * iter)3067 static void trace_iterator_increment(struct trace_iterator *iter)
3068 {
3069 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3070 
3071 	iter->idx++;
3072 	if (buf_iter)
3073 		ring_buffer_read(buf_iter, NULL);
3074 }
3075 
3076 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3077 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3078 		unsigned long *lost_events)
3079 {
3080 	struct ring_buffer_event *event;
3081 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3082 
3083 	if (buf_iter)
3084 		event = ring_buffer_iter_peek(buf_iter, ts);
3085 	else
3086 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3087 					 lost_events);
3088 
3089 	if (event) {
3090 		iter->ent_size = ring_buffer_event_length(event);
3091 		return ring_buffer_event_data(event);
3092 	}
3093 	iter->ent_size = 0;
3094 	return NULL;
3095 }
3096 
3097 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3098 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3099 		  unsigned long *missing_events, u64 *ent_ts)
3100 {
3101 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
3102 	struct trace_entry *ent, *next = NULL;
3103 	unsigned long lost_events = 0, next_lost = 0;
3104 	int cpu_file = iter->cpu_file;
3105 	u64 next_ts = 0, ts;
3106 	int next_cpu = -1;
3107 	int next_size = 0;
3108 	int cpu;
3109 
3110 	/*
3111 	 * If we are in a per_cpu trace file, don't bother by iterating over
3112 	 * all cpu and peek directly.
3113 	 */
3114 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3115 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3116 			return NULL;
3117 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3118 		if (ent_cpu)
3119 			*ent_cpu = cpu_file;
3120 
3121 		return ent;
3122 	}
3123 
3124 	for_each_tracing_cpu(cpu) {
3125 
3126 		if (ring_buffer_empty_cpu(buffer, cpu))
3127 			continue;
3128 
3129 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3130 
3131 		/*
3132 		 * Pick the entry with the smallest timestamp:
3133 		 */
3134 		if (ent && (!next || ts < next_ts)) {
3135 			next = ent;
3136 			next_cpu = cpu;
3137 			next_ts = ts;
3138 			next_lost = lost_events;
3139 			next_size = iter->ent_size;
3140 		}
3141 	}
3142 
3143 	iter->ent_size = next_size;
3144 
3145 	if (ent_cpu)
3146 		*ent_cpu = next_cpu;
3147 
3148 	if (ent_ts)
3149 		*ent_ts = next_ts;
3150 
3151 	if (missing_events)
3152 		*missing_events = next_lost;
3153 
3154 	return next;
3155 }
3156 
3157 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3158 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3159 					  int *ent_cpu, u64 *ent_ts)
3160 {
3161 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3162 }
3163 
3164 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3165 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3166 {
3167 	iter->ent = __find_next_entry(iter, &iter->cpu,
3168 				      &iter->lost_events, &iter->ts);
3169 
3170 	if (iter->ent)
3171 		trace_iterator_increment(iter);
3172 
3173 	return iter->ent ? iter : NULL;
3174 }
3175 
trace_consume(struct trace_iterator * iter)3176 static void trace_consume(struct trace_iterator *iter)
3177 {
3178 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3179 			    &iter->lost_events);
3180 }
3181 
s_next(struct seq_file * m,void * v,loff_t * pos)3182 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3183 {
3184 	struct trace_iterator *iter = m->private;
3185 	int i = (int)*pos;
3186 	void *ent;
3187 
3188 	WARN_ON_ONCE(iter->leftover);
3189 
3190 	(*pos)++;
3191 
3192 	/* can't go backwards */
3193 	if (iter->idx > i)
3194 		return NULL;
3195 
3196 	if (iter->idx < 0)
3197 		ent = trace_find_next_entry_inc(iter);
3198 	else
3199 		ent = iter;
3200 
3201 	while (ent && iter->idx < i)
3202 		ent = trace_find_next_entry_inc(iter);
3203 
3204 	iter->pos = *pos;
3205 
3206 	return ent;
3207 }
3208 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3209 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3210 {
3211 	struct ring_buffer_event *event;
3212 	struct ring_buffer_iter *buf_iter;
3213 	unsigned long entries = 0;
3214 	u64 ts;
3215 
3216 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3217 
3218 	buf_iter = trace_buffer_iter(iter, cpu);
3219 	if (!buf_iter)
3220 		return;
3221 
3222 	ring_buffer_iter_reset(buf_iter);
3223 
3224 	/*
3225 	 * We could have the case with the max latency tracers
3226 	 * that a reset never took place on a cpu. This is evident
3227 	 * by the timestamp being before the start of the buffer.
3228 	 */
3229 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3230 		if (ts >= iter->trace_buffer->time_start)
3231 			break;
3232 		entries++;
3233 		ring_buffer_read(buf_iter, NULL);
3234 	}
3235 
3236 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3237 }
3238 
3239 /*
3240  * The current tracer is copied to avoid a global locking
3241  * all around.
3242  */
s_start(struct seq_file * m,loff_t * pos)3243 static void *s_start(struct seq_file *m, loff_t *pos)
3244 {
3245 	struct trace_iterator *iter = m->private;
3246 	struct trace_array *tr = iter->tr;
3247 	int cpu_file = iter->cpu_file;
3248 	void *p = NULL;
3249 	loff_t l = 0;
3250 	int cpu;
3251 
3252 	/*
3253 	 * copy the tracer to avoid using a global lock all around.
3254 	 * iter->trace is a copy of current_trace, the pointer to the
3255 	 * name may be used instead of a strcmp(), as iter->trace->name
3256 	 * will point to the same string as current_trace->name.
3257 	 */
3258 	mutex_lock(&trace_types_lock);
3259 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3260 		*iter->trace = *tr->current_trace;
3261 	mutex_unlock(&trace_types_lock);
3262 
3263 #ifdef CONFIG_TRACER_MAX_TRACE
3264 	if (iter->snapshot && iter->trace->use_max_tr)
3265 		return ERR_PTR(-EBUSY);
3266 #endif
3267 
3268 	if (!iter->snapshot)
3269 		atomic_inc(&trace_record_taskinfo_disabled);
3270 
3271 	if (*pos != iter->pos) {
3272 		iter->ent = NULL;
3273 		iter->cpu = 0;
3274 		iter->idx = -1;
3275 
3276 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3277 			for_each_tracing_cpu(cpu)
3278 				tracing_iter_reset(iter, cpu);
3279 		} else
3280 			tracing_iter_reset(iter, cpu_file);
3281 
3282 		iter->leftover = 0;
3283 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3284 			;
3285 
3286 	} else {
3287 		/*
3288 		 * If we overflowed the seq_file before, then we want
3289 		 * to just reuse the trace_seq buffer again.
3290 		 */
3291 		if (iter->leftover)
3292 			p = iter;
3293 		else {
3294 			l = *pos - 1;
3295 			p = s_next(m, p, &l);
3296 		}
3297 	}
3298 
3299 	trace_event_read_lock();
3300 	trace_access_lock(cpu_file);
3301 	return p;
3302 }
3303 
s_stop(struct seq_file * m,void * p)3304 static void s_stop(struct seq_file *m, void *p)
3305 {
3306 	struct trace_iterator *iter = m->private;
3307 
3308 #ifdef CONFIG_TRACER_MAX_TRACE
3309 	if (iter->snapshot && iter->trace->use_max_tr)
3310 		return;
3311 #endif
3312 
3313 	if (!iter->snapshot)
3314 		atomic_dec(&trace_record_taskinfo_disabled);
3315 
3316 	trace_access_unlock(iter->cpu_file);
3317 	trace_event_read_unlock();
3318 }
3319 
3320 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)3321 get_total_entries(struct trace_buffer *buf,
3322 		  unsigned long *total, unsigned long *entries)
3323 {
3324 	unsigned long count;
3325 	int cpu;
3326 
3327 	*total = 0;
3328 	*entries = 0;
3329 
3330 	for_each_tracing_cpu(cpu) {
3331 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
3332 		/*
3333 		 * If this buffer has skipped entries, then we hold all
3334 		 * entries for the trace and we need to ignore the
3335 		 * ones before the time stamp.
3336 		 */
3337 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3338 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3339 			/* total is the same as the entries */
3340 			*total += count;
3341 		} else
3342 			*total += count +
3343 				ring_buffer_overrun_cpu(buf->buffer, cpu);
3344 		*entries += count;
3345 	}
3346 }
3347 
print_lat_help_header(struct seq_file * m)3348 static void print_lat_help_header(struct seq_file *m)
3349 {
3350 	seq_puts(m, "#                  _------=> CPU#            \n"
3351 		    "#                 / _-----=> irqs-off        \n"
3352 		    "#                | / _----=> need-resched    \n"
3353 		    "#                || / _---=> hardirq/softirq \n"
3354 		    "#                ||| / _--=> preempt-depth   \n"
3355 		    "#                |||| /     delay            \n"
3356 		    "#  cmd     pid   ||||| time  |   caller      \n"
3357 		    "#     \\   /      |||||  \\    |   /         \n");
3358 }
3359 
print_event_info(struct trace_buffer * buf,struct seq_file * m)3360 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3361 {
3362 	unsigned long total;
3363 	unsigned long entries;
3364 
3365 	get_total_entries(buf, &total, &entries);
3366 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3367 		   entries, total, num_online_cpus());
3368 	seq_puts(m, "#\n");
3369 }
3370 
print_func_help_header(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3371 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3372 				   unsigned int flags)
3373 {
3374 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3375 
3376 	print_event_info(buf, m);
3377 
3378 	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3379 	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
3380 }
3381 
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m,unsigned int flags)3382 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3383 				       unsigned int flags)
3384 {
3385 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3386 	const char tgid_space[] = "          ";
3387 	const char space[] = "  ";
3388 
3389 	print_event_info(buf, m);
3390 
3391 	seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3392 		   tgid ? tgid_space : space);
3393 	seq_printf(m, "#                          %s / _----=> need-resched\n",
3394 		   tgid ? tgid_space : space);
3395 	seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3396 		   tgid ? tgid_space : space);
3397 	seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3398 		   tgid ? tgid_space : space);
3399 	seq_printf(m, "#                          %s||| /     delay\n",
3400 		   tgid ? tgid_space : space);
3401 	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
3402 		   tgid ? "   TGID   " : space);
3403 	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
3404 		   tgid ? "     |    " : space);
3405 }
3406 
3407 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3408 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3409 {
3410 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3411 	struct trace_buffer *buf = iter->trace_buffer;
3412 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3413 	struct tracer *type = iter->trace;
3414 	unsigned long entries;
3415 	unsigned long total;
3416 	const char *name = "preemption";
3417 
3418 	name = type->name;
3419 
3420 	get_total_entries(buf, &total, &entries);
3421 
3422 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3423 		   name, UTS_RELEASE);
3424 	seq_puts(m, "# -----------------------------------"
3425 		 "---------------------------------\n");
3426 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3427 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3428 		   nsecs_to_usecs(data->saved_latency),
3429 		   entries,
3430 		   total,
3431 		   buf->cpu,
3432 #if defined(CONFIG_PREEMPT_NONE)
3433 		   "server",
3434 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3435 		   "desktop",
3436 #elif defined(CONFIG_PREEMPT)
3437 		   "preempt",
3438 #else
3439 		   "unknown",
3440 #endif
3441 		   /* These are reserved for later use */
3442 		   0, 0, 0, 0);
3443 #ifdef CONFIG_SMP
3444 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3445 #else
3446 	seq_puts(m, ")\n");
3447 #endif
3448 	seq_puts(m, "#    -----------------\n");
3449 	seq_printf(m, "#    | task: %.16s-%d "
3450 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3451 		   data->comm, data->pid,
3452 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3453 		   data->policy, data->rt_priority);
3454 	seq_puts(m, "#    -----------------\n");
3455 
3456 	if (data->critical_start) {
3457 		seq_puts(m, "#  => started at: ");
3458 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3459 		trace_print_seq(m, &iter->seq);
3460 		seq_puts(m, "\n#  => ended at:   ");
3461 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3462 		trace_print_seq(m, &iter->seq);
3463 		seq_puts(m, "\n#\n");
3464 	}
3465 
3466 	seq_puts(m, "#\n");
3467 }
3468 
test_cpu_buff_start(struct trace_iterator * iter)3469 static void test_cpu_buff_start(struct trace_iterator *iter)
3470 {
3471 	struct trace_seq *s = &iter->seq;
3472 	struct trace_array *tr = iter->tr;
3473 
3474 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3475 		return;
3476 
3477 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3478 		return;
3479 
3480 	if (cpumask_available(iter->started) &&
3481 	    cpumask_test_cpu(iter->cpu, iter->started))
3482 		return;
3483 
3484 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3485 		return;
3486 
3487 	if (cpumask_available(iter->started))
3488 		cpumask_set_cpu(iter->cpu, iter->started);
3489 
3490 	/* Don't print started cpu buffer for the first entry of the trace */
3491 	if (iter->idx > 1)
3492 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3493 				iter->cpu);
3494 }
3495 
print_trace_fmt(struct trace_iterator * iter)3496 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3497 {
3498 	struct trace_array *tr = iter->tr;
3499 	struct trace_seq *s = &iter->seq;
3500 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3501 	struct trace_entry *entry;
3502 	struct trace_event *event;
3503 
3504 	entry = iter->ent;
3505 
3506 	test_cpu_buff_start(iter);
3507 
3508 	event = ftrace_find_event(entry->type);
3509 
3510 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3511 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3512 			trace_print_lat_context(iter);
3513 		else
3514 			trace_print_context(iter);
3515 	}
3516 
3517 	if (trace_seq_has_overflowed(s))
3518 		return TRACE_TYPE_PARTIAL_LINE;
3519 
3520 	if (event)
3521 		return event->funcs->trace(iter, sym_flags, event);
3522 
3523 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3524 
3525 	return trace_handle_return(s);
3526 }
3527 
print_raw_fmt(struct trace_iterator * iter)3528 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3529 {
3530 	struct trace_array *tr = iter->tr;
3531 	struct trace_seq *s = &iter->seq;
3532 	struct trace_entry *entry;
3533 	struct trace_event *event;
3534 
3535 	entry = iter->ent;
3536 
3537 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3538 		trace_seq_printf(s, "%d %d %llu ",
3539 				 entry->pid, iter->cpu, iter->ts);
3540 
3541 	if (trace_seq_has_overflowed(s))
3542 		return TRACE_TYPE_PARTIAL_LINE;
3543 
3544 	event = ftrace_find_event(entry->type);
3545 	if (event)
3546 		return event->funcs->raw(iter, 0, event);
3547 
3548 	trace_seq_printf(s, "%d ?\n", entry->type);
3549 
3550 	return trace_handle_return(s);
3551 }
3552 
print_hex_fmt(struct trace_iterator * iter)3553 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3554 {
3555 	struct trace_array *tr = iter->tr;
3556 	struct trace_seq *s = &iter->seq;
3557 	unsigned char newline = '\n';
3558 	struct trace_entry *entry;
3559 	struct trace_event *event;
3560 
3561 	entry = iter->ent;
3562 
3563 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3564 		SEQ_PUT_HEX_FIELD(s, entry->pid);
3565 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
3566 		SEQ_PUT_HEX_FIELD(s, iter->ts);
3567 		if (trace_seq_has_overflowed(s))
3568 			return TRACE_TYPE_PARTIAL_LINE;
3569 	}
3570 
3571 	event = ftrace_find_event(entry->type);
3572 	if (event) {
3573 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
3574 		if (ret != TRACE_TYPE_HANDLED)
3575 			return ret;
3576 	}
3577 
3578 	SEQ_PUT_FIELD(s, newline);
3579 
3580 	return trace_handle_return(s);
3581 }
3582 
print_bin_fmt(struct trace_iterator * iter)3583 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3584 {
3585 	struct trace_array *tr = iter->tr;
3586 	struct trace_seq *s = &iter->seq;
3587 	struct trace_entry *entry;
3588 	struct trace_event *event;
3589 
3590 	entry = iter->ent;
3591 
3592 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3593 		SEQ_PUT_FIELD(s, entry->pid);
3594 		SEQ_PUT_FIELD(s, iter->cpu);
3595 		SEQ_PUT_FIELD(s, iter->ts);
3596 		if (trace_seq_has_overflowed(s))
3597 			return TRACE_TYPE_PARTIAL_LINE;
3598 	}
3599 
3600 	event = ftrace_find_event(entry->type);
3601 	return event ? event->funcs->binary(iter, 0, event) :
3602 		TRACE_TYPE_HANDLED;
3603 }
3604 
trace_empty(struct trace_iterator * iter)3605 int trace_empty(struct trace_iterator *iter)
3606 {
3607 	struct ring_buffer_iter *buf_iter;
3608 	int cpu;
3609 
3610 	/* If we are looking at one CPU buffer, only check that one */
3611 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3612 		cpu = iter->cpu_file;
3613 		buf_iter = trace_buffer_iter(iter, cpu);
3614 		if (buf_iter) {
3615 			if (!ring_buffer_iter_empty(buf_iter))
3616 				return 0;
3617 		} else {
3618 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3619 				return 0;
3620 		}
3621 		return 1;
3622 	}
3623 
3624 	for_each_tracing_cpu(cpu) {
3625 		buf_iter = trace_buffer_iter(iter, cpu);
3626 		if (buf_iter) {
3627 			if (!ring_buffer_iter_empty(buf_iter))
3628 				return 0;
3629 		} else {
3630 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3631 				return 0;
3632 		}
3633 	}
3634 
3635 	return 1;
3636 }
3637 
3638 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3639 enum print_line_t print_trace_line(struct trace_iterator *iter)
3640 {
3641 	struct trace_array *tr = iter->tr;
3642 	unsigned long trace_flags = tr->trace_flags;
3643 	enum print_line_t ret;
3644 
3645 	if (iter->lost_events) {
3646 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3647 				 iter->cpu, iter->lost_events);
3648 		if (trace_seq_has_overflowed(&iter->seq))
3649 			return TRACE_TYPE_PARTIAL_LINE;
3650 	}
3651 
3652 	if (iter->trace && iter->trace->print_line) {
3653 		ret = iter->trace->print_line(iter);
3654 		if (ret != TRACE_TYPE_UNHANDLED)
3655 			return ret;
3656 	}
3657 
3658 	if (iter->ent->type == TRACE_BPUTS &&
3659 			trace_flags & TRACE_ITER_PRINTK &&
3660 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3661 		return trace_print_bputs_msg_only(iter);
3662 
3663 	if (iter->ent->type == TRACE_BPRINT &&
3664 			trace_flags & TRACE_ITER_PRINTK &&
3665 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3666 		return trace_print_bprintk_msg_only(iter);
3667 
3668 	if (iter->ent->type == TRACE_PRINT &&
3669 			trace_flags & TRACE_ITER_PRINTK &&
3670 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3671 		return trace_print_printk_msg_only(iter);
3672 
3673 	if (trace_flags & TRACE_ITER_BIN)
3674 		return print_bin_fmt(iter);
3675 
3676 	if (trace_flags & TRACE_ITER_HEX)
3677 		return print_hex_fmt(iter);
3678 
3679 	if (trace_flags & TRACE_ITER_RAW)
3680 		return print_raw_fmt(iter);
3681 
3682 	return print_trace_fmt(iter);
3683 }
3684 
trace_latency_header(struct seq_file * m)3685 void trace_latency_header(struct seq_file *m)
3686 {
3687 	struct trace_iterator *iter = m->private;
3688 	struct trace_array *tr = iter->tr;
3689 
3690 	/* print nothing if the buffers are empty */
3691 	if (trace_empty(iter))
3692 		return;
3693 
3694 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3695 		print_trace_header(m, iter);
3696 
3697 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3698 		print_lat_help_header(m);
3699 }
3700 
trace_default_header(struct seq_file * m)3701 void trace_default_header(struct seq_file *m)
3702 {
3703 	struct trace_iterator *iter = m->private;
3704 	struct trace_array *tr = iter->tr;
3705 	unsigned long trace_flags = tr->trace_flags;
3706 
3707 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3708 		return;
3709 
3710 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3711 		/* print nothing if the buffers are empty */
3712 		if (trace_empty(iter))
3713 			return;
3714 		print_trace_header(m, iter);
3715 		if (!(trace_flags & TRACE_ITER_VERBOSE))
3716 			print_lat_help_header(m);
3717 	} else {
3718 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3719 			if (trace_flags & TRACE_ITER_IRQ_INFO)
3720 				print_func_help_header_irq(iter->trace_buffer,
3721 							   m, trace_flags);
3722 			else
3723 				print_func_help_header(iter->trace_buffer, m,
3724 						       trace_flags);
3725 		}
3726 	}
3727 }
3728 
test_ftrace_alive(struct seq_file * m)3729 static void test_ftrace_alive(struct seq_file *m)
3730 {
3731 	if (!ftrace_is_dead())
3732 		return;
3733 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3734 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
3735 }
3736 
3737 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)3738 static void show_snapshot_main_help(struct seq_file *m)
3739 {
3740 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3741 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3742 		    "#                      Takes a snapshot of the main buffer.\n"
3743 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3744 		    "#                      (Doesn't have to be '2' works with any number that\n"
3745 		    "#                       is not a '0' or '1')\n");
3746 }
3747 
show_snapshot_percpu_help(struct seq_file * m)3748 static void show_snapshot_percpu_help(struct seq_file *m)
3749 {
3750 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3751 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3752 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3753 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3754 #else
3755 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3756 		    "#                     Must use main snapshot file to allocate.\n");
3757 #endif
3758 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3759 		    "#                      (Doesn't have to be '2' works with any number that\n"
3760 		    "#                       is not a '0' or '1')\n");
3761 }
3762 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3763 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3764 {
3765 	if (iter->tr->allocated_snapshot)
3766 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3767 	else
3768 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3769 
3770 	seq_puts(m, "# Snapshot commands:\n");
3771 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3772 		show_snapshot_main_help(m);
3773 	else
3774 		show_snapshot_percpu_help(m);
3775 }
3776 #else
3777 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3778 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3779 #endif
3780 
s_show(struct seq_file * m,void * v)3781 static int s_show(struct seq_file *m, void *v)
3782 {
3783 	struct trace_iterator *iter = v;
3784 	int ret;
3785 
3786 	if (iter->ent == NULL) {
3787 		if (iter->tr) {
3788 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3789 			seq_puts(m, "#\n");
3790 			test_ftrace_alive(m);
3791 		}
3792 		if (iter->snapshot && trace_empty(iter))
3793 			print_snapshot_help(m, iter);
3794 		else if (iter->trace && iter->trace->print_header)
3795 			iter->trace->print_header(m);
3796 		else
3797 			trace_default_header(m);
3798 
3799 	} else if (iter->leftover) {
3800 		/*
3801 		 * If we filled the seq_file buffer earlier, we
3802 		 * want to just show it now.
3803 		 */
3804 		ret = trace_print_seq(m, &iter->seq);
3805 
3806 		/* ret should this time be zero, but you never know */
3807 		iter->leftover = ret;
3808 
3809 	} else {
3810 		print_trace_line(iter);
3811 		ret = trace_print_seq(m, &iter->seq);
3812 		/*
3813 		 * If we overflow the seq_file buffer, then it will
3814 		 * ask us for this data again at start up.
3815 		 * Use that instead.
3816 		 *  ret is 0 if seq_file write succeeded.
3817 		 *        -1 otherwise.
3818 		 */
3819 		iter->leftover = ret;
3820 	}
3821 
3822 	return 0;
3823 }
3824 
3825 /*
3826  * Should be used after trace_array_get(), trace_types_lock
3827  * ensures that i_cdev was already initialized.
3828  */
tracing_get_cpu(struct inode * inode)3829 static inline int tracing_get_cpu(struct inode *inode)
3830 {
3831 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3832 		return (long)inode->i_cdev - 1;
3833 	return RING_BUFFER_ALL_CPUS;
3834 }
3835 
3836 static const struct seq_operations tracer_seq_ops = {
3837 	.start		= s_start,
3838 	.next		= s_next,
3839 	.stop		= s_stop,
3840 	.show		= s_show,
3841 };
3842 
3843 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3844 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3845 {
3846 	struct trace_array *tr = inode->i_private;
3847 	struct trace_iterator *iter;
3848 	int cpu;
3849 
3850 	if (tracing_disabled)
3851 		return ERR_PTR(-ENODEV);
3852 
3853 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3854 	if (!iter)
3855 		return ERR_PTR(-ENOMEM);
3856 
3857 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3858 				    GFP_KERNEL);
3859 	if (!iter->buffer_iter)
3860 		goto release;
3861 
3862 	/*
3863 	 * We make a copy of the current tracer to avoid concurrent
3864 	 * changes on it while we are reading.
3865 	 */
3866 	mutex_lock(&trace_types_lock);
3867 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3868 	if (!iter->trace)
3869 		goto fail;
3870 
3871 	*iter->trace = *tr->current_trace;
3872 
3873 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3874 		goto fail;
3875 
3876 	iter->tr = tr;
3877 
3878 #ifdef CONFIG_TRACER_MAX_TRACE
3879 	/* Currently only the top directory has a snapshot */
3880 	if (tr->current_trace->print_max || snapshot)
3881 		iter->trace_buffer = &tr->max_buffer;
3882 	else
3883 #endif
3884 		iter->trace_buffer = &tr->trace_buffer;
3885 	iter->snapshot = snapshot;
3886 	iter->pos = -1;
3887 	iter->cpu_file = tracing_get_cpu(inode);
3888 	mutex_init(&iter->mutex);
3889 
3890 	/* Notify the tracer early; before we stop tracing. */
3891 	if (iter->trace && iter->trace->open)
3892 		iter->trace->open(iter);
3893 
3894 	/* Annotate start of buffers if we had overruns */
3895 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3896 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3897 
3898 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3899 	if (trace_clocks[tr->clock_id].in_ns)
3900 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3901 
3902 	/* stop the trace while dumping if we are not opening "snapshot" */
3903 	if (!iter->snapshot)
3904 		tracing_stop_tr(tr);
3905 
3906 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3907 		for_each_tracing_cpu(cpu) {
3908 			iter->buffer_iter[cpu] =
3909 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
3910 							 cpu, GFP_KERNEL);
3911 		}
3912 		ring_buffer_read_prepare_sync();
3913 		for_each_tracing_cpu(cpu) {
3914 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3915 			tracing_iter_reset(iter, cpu);
3916 		}
3917 	} else {
3918 		cpu = iter->cpu_file;
3919 		iter->buffer_iter[cpu] =
3920 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
3921 						 cpu, GFP_KERNEL);
3922 		ring_buffer_read_prepare_sync();
3923 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3924 		tracing_iter_reset(iter, cpu);
3925 	}
3926 
3927 	mutex_unlock(&trace_types_lock);
3928 
3929 	return iter;
3930 
3931  fail:
3932 	mutex_unlock(&trace_types_lock);
3933 	kfree(iter->trace);
3934 	kfree(iter->buffer_iter);
3935 release:
3936 	seq_release_private(inode, file);
3937 	return ERR_PTR(-ENOMEM);
3938 }
3939 
tracing_open_generic(struct inode * inode,struct file * filp)3940 int tracing_open_generic(struct inode *inode, struct file *filp)
3941 {
3942 	if (tracing_disabled)
3943 		return -ENODEV;
3944 
3945 	filp->private_data = inode->i_private;
3946 	return 0;
3947 }
3948 
tracing_is_disabled(void)3949 bool tracing_is_disabled(void)
3950 {
3951 	return (tracing_disabled) ? true: false;
3952 }
3953 
3954 /*
3955  * Open and update trace_array ref count.
3956  * Must have the current trace_array passed to it.
3957  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)3958 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3959 {
3960 	struct trace_array *tr = inode->i_private;
3961 
3962 	if (tracing_disabled)
3963 		return -ENODEV;
3964 
3965 	if (trace_array_get(tr) < 0)
3966 		return -ENODEV;
3967 
3968 	filp->private_data = inode->i_private;
3969 
3970 	return 0;
3971 }
3972 
tracing_release(struct inode * inode,struct file * file)3973 static int tracing_release(struct inode *inode, struct file *file)
3974 {
3975 	struct trace_array *tr = inode->i_private;
3976 	struct seq_file *m = file->private_data;
3977 	struct trace_iterator *iter;
3978 	int cpu;
3979 
3980 	if (!(file->f_mode & FMODE_READ)) {
3981 		trace_array_put(tr);
3982 		return 0;
3983 	}
3984 
3985 	/* Writes do not use seq_file */
3986 	iter = m->private;
3987 	mutex_lock(&trace_types_lock);
3988 
3989 	for_each_tracing_cpu(cpu) {
3990 		if (iter->buffer_iter[cpu])
3991 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3992 	}
3993 
3994 	if (iter->trace && iter->trace->close)
3995 		iter->trace->close(iter);
3996 
3997 	if (!iter->snapshot)
3998 		/* reenable tracing if it was previously enabled */
3999 		tracing_start_tr(tr);
4000 
4001 	__trace_array_put(tr);
4002 
4003 	mutex_unlock(&trace_types_lock);
4004 
4005 	mutex_destroy(&iter->mutex);
4006 	free_cpumask_var(iter->started);
4007 	kfree(iter->trace);
4008 	kfree(iter->buffer_iter);
4009 	seq_release_private(inode, file);
4010 
4011 	return 0;
4012 }
4013 
tracing_release_generic_tr(struct inode * inode,struct file * file)4014 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4015 {
4016 	struct trace_array *tr = inode->i_private;
4017 
4018 	trace_array_put(tr);
4019 	return 0;
4020 }
4021 
tracing_single_release_tr(struct inode * inode,struct file * file)4022 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4023 {
4024 	struct trace_array *tr = inode->i_private;
4025 
4026 	trace_array_put(tr);
4027 
4028 	return single_release(inode, file);
4029 }
4030 
tracing_open(struct inode * inode,struct file * file)4031 static int tracing_open(struct inode *inode, struct file *file)
4032 {
4033 	struct trace_array *tr = inode->i_private;
4034 	struct trace_iterator *iter;
4035 	int ret = 0;
4036 
4037 	if (trace_array_get(tr) < 0)
4038 		return -ENODEV;
4039 
4040 	/* If this file was open for write, then erase contents */
4041 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4042 		int cpu = tracing_get_cpu(inode);
4043 		struct trace_buffer *trace_buf = &tr->trace_buffer;
4044 
4045 #ifdef CONFIG_TRACER_MAX_TRACE
4046 		if (tr->current_trace->print_max)
4047 			trace_buf = &tr->max_buffer;
4048 #endif
4049 
4050 		if (cpu == RING_BUFFER_ALL_CPUS)
4051 			tracing_reset_online_cpus(trace_buf);
4052 		else
4053 			tracing_reset(trace_buf, cpu);
4054 	}
4055 
4056 	if (file->f_mode & FMODE_READ) {
4057 		iter = __tracing_open(inode, file, false);
4058 		if (IS_ERR(iter))
4059 			ret = PTR_ERR(iter);
4060 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4061 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4062 	}
4063 
4064 	if (ret < 0)
4065 		trace_array_put(tr);
4066 
4067 	return ret;
4068 }
4069 
4070 /*
4071  * Some tracers are not suitable for instance buffers.
4072  * A tracer is always available for the global array (toplevel)
4073  * or if it explicitly states that it is.
4074  */
4075 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4076 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4077 {
4078 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4079 }
4080 
4081 /* Find the next tracer that this trace array may use */
4082 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4083 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4084 {
4085 	while (t && !trace_ok_for_array(t, tr))
4086 		t = t->next;
4087 
4088 	return t;
4089 }
4090 
4091 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4092 t_next(struct seq_file *m, void *v, loff_t *pos)
4093 {
4094 	struct trace_array *tr = m->private;
4095 	struct tracer *t = v;
4096 
4097 	(*pos)++;
4098 
4099 	if (t)
4100 		t = get_tracer_for_array(tr, t->next);
4101 
4102 	return t;
4103 }
4104 
t_start(struct seq_file * m,loff_t * pos)4105 static void *t_start(struct seq_file *m, loff_t *pos)
4106 {
4107 	struct trace_array *tr = m->private;
4108 	struct tracer *t;
4109 	loff_t l = 0;
4110 
4111 	mutex_lock(&trace_types_lock);
4112 
4113 	t = get_tracer_for_array(tr, trace_types);
4114 	for (; t && l < *pos; t = t_next(m, t, &l))
4115 			;
4116 
4117 	return t;
4118 }
4119 
t_stop(struct seq_file * m,void * p)4120 static void t_stop(struct seq_file *m, void *p)
4121 {
4122 	mutex_unlock(&trace_types_lock);
4123 }
4124 
t_show(struct seq_file * m,void * v)4125 static int t_show(struct seq_file *m, void *v)
4126 {
4127 	struct tracer *t = v;
4128 
4129 	if (!t)
4130 		return 0;
4131 
4132 	seq_puts(m, t->name);
4133 	if (t->next)
4134 		seq_putc(m, ' ');
4135 	else
4136 		seq_putc(m, '\n');
4137 
4138 	return 0;
4139 }
4140 
4141 static const struct seq_operations show_traces_seq_ops = {
4142 	.start		= t_start,
4143 	.next		= t_next,
4144 	.stop		= t_stop,
4145 	.show		= t_show,
4146 };
4147 
show_traces_open(struct inode * inode,struct file * file)4148 static int show_traces_open(struct inode *inode, struct file *file)
4149 {
4150 	struct trace_array *tr = inode->i_private;
4151 	struct seq_file *m;
4152 	int ret;
4153 
4154 	if (tracing_disabled)
4155 		return -ENODEV;
4156 
4157 	if (trace_array_get(tr) < 0)
4158 		return -ENODEV;
4159 
4160 	ret = seq_open(file, &show_traces_seq_ops);
4161 	if (ret) {
4162 		trace_array_put(tr);
4163 		return ret;
4164 	}
4165 
4166 	m = file->private_data;
4167 	m->private = tr;
4168 
4169 	return 0;
4170 }
4171 
show_traces_release(struct inode * inode,struct file * file)4172 static int show_traces_release(struct inode *inode, struct file *file)
4173 {
4174 	struct trace_array *tr = inode->i_private;
4175 
4176 	trace_array_put(tr);
4177 	return seq_release(inode, file);
4178 }
4179 
4180 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4181 tracing_write_stub(struct file *filp, const char __user *ubuf,
4182 		   size_t count, loff_t *ppos)
4183 {
4184 	return count;
4185 }
4186 
tracing_lseek(struct file * file,loff_t offset,int whence)4187 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4188 {
4189 	int ret;
4190 
4191 	if (file->f_mode & FMODE_READ)
4192 		ret = seq_lseek(file, offset, whence);
4193 	else
4194 		file->f_pos = ret = 0;
4195 
4196 	return ret;
4197 }
4198 
4199 static const struct file_operations tracing_fops = {
4200 	.open		= tracing_open,
4201 	.read		= seq_read,
4202 	.write		= tracing_write_stub,
4203 	.llseek		= tracing_lseek,
4204 	.release	= tracing_release,
4205 };
4206 
4207 static const struct file_operations show_traces_fops = {
4208 	.open		= show_traces_open,
4209 	.read		= seq_read,
4210 	.llseek		= seq_lseek,
4211 	.release	= show_traces_release,
4212 };
4213 
4214 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4215 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4216 		     size_t count, loff_t *ppos)
4217 {
4218 	struct trace_array *tr = file_inode(filp)->i_private;
4219 	char *mask_str;
4220 	int len;
4221 
4222 	len = snprintf(NULL, 0, "%*pb\n",
4223 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4224 	mask_str = kmalloc(len, GFP_KERNEL);
4225 	if (!mask_str)
4226 		return -ENOMEM;
4227 
4228 	len = snprintf(mask_str, len, "%*pb\n",
4229 		       cpumask_pr_args(tr->tracing_cpumask));
4230 	if (len >= count) {
4231 		count = -EINVAL;
4232 		goto out_err;
4233 	}
4234 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4235 
4236 out_err:
4237 	kfree(mask_str);
4238 
4239 	return count;
4240 }
4241 
4242 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4243 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4244 		      size_t count, loff_t *ppos)
4245 {
4246 	struct trace_array *tr = file_inode(filp)->i_private;
4247 	cpumask_var_t tracing_cpumask_new;
4248 	int err, cpu;
4249 
4250 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4251 		return -ENOMEM;
4252 
4253 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4254 	if (err)
4255 		goto err_unlock;
4256 
4257 	local_irq_disable();
4258 	arch_spin_lock(&tr->max_lock);
4259 	for_each_tracing_cpu(cpu) {
4260 		/*
4261 		 * Increase/decrease the disabled counter if we are
4262 		 * about to flip a bit in the cpumask:
4263 		 */
4264 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4265 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4266 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4267 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4268 		}
4269 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4270 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4271 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4272 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4273 		}
4274 	}
4275 	arch_spin_unlock(&tr->max_lock);
4276 	local_irq_enable();
4277 
4278 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4279 	free_cpumask_var(tracing_cpumask_new);
4280 
4281 	return count;
4282 
4283 err_unlock:
4284 	free_cpumask_var(tracing_cpumask_new);
4285 
4286 	return err;
4287 }
4288 
4289 static const struct file_operations tracing_cpumask_fops = {
4290 	.open		= tracing_open_generic_tr,
4291 	.read		= tracing_cpumask_read,
4292 	.write		= tracing_cpumask_write,
4293 	.release	= tracing_release_generic_tr,
4294 	.llseek		= generic_file_llseek,
4295 };
4296 
tracing_trace_options_show(struct seq_file * m,void * v)4297 static int tracing_trace_options_show(struct seq_file *m, void *v)
4298 {
4299 	struct tracer_opt *trace_opts;
4300 	struct trace_array *tr = m->private;
4301 	u32 tracer_flags;
4302 	int i;
4303 
4304 	mutex_lock(&trace_types_lock);
4305 	tracer_flags = tr->current_trace->flags->val;
4306 	trace_opts = tr->current_trace->flags->opts;
4307 
4308 	for (i = 0; trace_options[i]; i++) {
4309 		if (tr->trace_flags & (1 << i))
4310 			seq_printf(m, "%s\n", trace_options[i]);
4311 		else
4312 			seq_printf(m, "no%s\n", trace_options[i]);
4313 	}
4314 
4315 	for (i = 0; trace_opts[i].name; i++) {
4316 		if (tracer_flags & trace_opts[i].bit)
4317 			seq_printf(m, "%s\n", trace_opts[i].name);
4318 		else
4319 			seq_printf(m, "no%s\n", trace_opts[i].name);
4320 	}
4321 	mutex_unlock(&trace_types_lock);
4322 
4323 	return 0;
4324 }
4325 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4326 static int __set_tracer_option(struct trace_array *tr,
4327 			       struct tracer_flags *tracer_flags,
4328 			       struct tracer_opt *opts, int neg)
4329 {
4330 	struct tracer *trace = tracer_flags->trace;
4331 	int ret;
4332 
4333 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4334 	if (ret)
4335 		return ret;
4336 
4337 	if (neg)
4338 		tracer_flags->val &= ~opts->bit;
4339 	else
4340 		tracer_flags->val |= opts->bit;
4341 	return 0;
4342 }
4343 
4344 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4345 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4346 {
4347 	struct tracer *trace = tr->current_trace;
4348 	struct tracer_flags *tracer_flags = trace->flags;
4349 	struct tracer_opt *opts = NULL;
4350 	int i;
4351 
4352 	for (i = 0; tracer_flags->opts[i].name; i++) {
4353 		opts = &tracer_flags->opts[i];
4354 
4355 		if (strcmp(cmp, opts->name) == 0)
4356 			return __set_tracer_option(tr, trace->flags, opts, neg);
4357 	}
4358 
4359 	return -EINVAL;
4360 }
4361 
4362 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4363 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4364 {
4365 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4366 		return -1;
4367 
4368 	return 0;
4369 }
4370 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4371 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4372 {
4373 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4374 	    (mask == TRACE_ITER_RECORD_CMD))
4375 		lockdep_assert_held(&event_mutex);
4376 
4377 	/* do nothing if flag is already set */
4378 	if (!!(tr->trace_flags & mask) == !!enabled)
4379 		return 0;
4380 
4381 	/* Give the tracer a chance to approve the change */
4382 	if (tr->current_trace->flag_changed)
4383 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4384 			return -EINVAL;
4385 
4386 	if (enabled)
4387 		tr->trace_flags |= mask;
4388 	else
4389 		tr->trace_flags &= ~mask;
4390 
4391 	if (mask == TRACE_ITER_RECORD_CMD)
4392 		trace_event_enable_cmd_record(enabled);
4393 
4394 	if (mask == TRACE_ITER_RECORD_TGID) {
4395 		if (!tgid_map)
4396 			tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4397 					   GFP_KERNEL);
4398 		if (!tgid_map) {
4399 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4400 			return -ENOMEM;
4401 		}
4402 
4403 		trace_event_enable_tgid_record(enabled);
4404 	}
4405 
4406 	if (mask == TRACE_ITER_EVENT_FORK)
4407 		trace_event_follow_fork(tr, enabled);
4408 
4409 	if (mask == TRACE_ITER_FUNC_FORK)
4410 		ftrace_pid_follow_fork(tr, enabled);
4411 
4412 	if (mask == TRACE_ITER_OVERWRITE) {
4413 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4414 #ifdef CONFIG_TRACER_MAX_TRACE
4415 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4416 #endif
4417 	}
4418 
4419 	if (mask == TRACE_ITER_PRINTK) {
4420 		trace_printk_start_stop_comm(enabled);
4421 		trace_printk_control(enabled);
4422 	}
4423 
4424 	return 0;
4425 }
4426 
trace_set_options(struct trace_array * tr,char * option)4427 static int trace_set_options(struct trace_array *tr, char *option)
4428 {
4429 	char *cmp;
4430 	int neg = 0;
4431 	int ret = -ENODEV;
4432 	int i;
4433 	size_t orig_len = strlen(option);
4434 
4435 	cmp = strstrip(option);
4436 
4437 	if (strncmp(cmp, "no", 2) == 0) {
4438 		neg = 1;
4439 		cmp += 2;
4440 	}
4441 
4442 	mutex_lock(&event_mutex);
4443 	mutex_lock(&trace_types_lock);
4444 
4445 	for (i = 0; trace_options[i]; i++) {
4446 		if (strcmp(cmp, trace_options[i]) == 0) {
4447 			ret = set_tracer_flag(tr, 1 << i, !neg);
4448 			break;
4449 		}
4450 	}
4451 
4452 	/* If no option could be set, test the specific tracer options */
4453 	if (!trace_options[i])
4454 		ret = set_tracer_option(tr, cmp, neg);
4455 
4456 	mutex_unlock(&trace_types_lock);
4457 	mutex_unlock(&event_mutex);
4458 
4459 	/*
4460 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4461 	 * turn it back into a space.
4462 	 */
4463 	if (orig_len > strlen(option))
4464 		option[strlen(option)] = ' ';
4465 
4466 	return ret;
4467 }
4468 
apply_trace_boot_options(void)4469 static void __init apply_trace_boot_options(void)
4470 {
4471 	char *buf = trace_boot_options_buf;
4472 	char *option;
4473 
4474 	while (true) {
4475 		option = strsep(&buf, ",");
4476 
4477 		if (!option)
4478 			break;
4479 
4480 		if (*option)
4481 			trace_set_options(&global_trace, option);
4482 
4483 		/* Put back the comma to allow this to be called again */
4484 		if (buf)
4485 			*(buf - 1) = ',';
4486 	}
4487 }
4488 
4489 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4490 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4491 			size_t cnt, loff_t *ppos)
4492 {
4493 	struct seq_file *m = filp->private_data;
4494 	struct trace_array *tr = m->private;
4495 	char buf[64];
4496 	int ret;
4497 
4498 	if (cnt >= sizeof(buf))
4499 		return -EINVAL;
4500 
4501 	if (copy_from_user(buf, ubuf, cnt))
4502 		return -EFAULT;
4503 
4504 	buf[cnt] = 0;
4505 
4506 	ret = trace_set_options(tr, buf);
4507 	if (ret < 0)
4508 		return ret;
4509 
4510 	*ppos += cnt;
4511 
4512 	return cnt;
4513 }
4514 
tracing_trace_options_open(struct inode * inode,struct file * file)4515 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4516 {
4517 	struct trace_array *tr = inode->i_private;
4518 	int ret;
4519 
4520 	if (tracing_disabled)
4521 		return -ENODEV;
4522 
4523 	if (trace_array_get(tr) < 0)
4524 		return -ENODEV;
4525 
4526 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
4527 	if (ret < 0)
4528 		trace_array_put(tr);
4529 
4530 	return ret;
4531 }
4532 
4533 static const struct file_operations tracing_iter_fops = {
4534 	.open		= tracing_trace_options_open,
4535 	.read		= seq_read,
4536 	.llseek		= seq_lseek,
4537 	.release	= tracing_single_release_tr,
4538 	.write		= tracing_trace_options_write,
4539 };
4540 
4541 static const char readme_msg[] =
4542 	"tracing mini-HOWTO:\n\n"
4543 	"# echo 0 > tracing_on : quick way to disable tracing\n"
4544 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4545 	" Important files:\n"
4546 	"  trace\t\t\t- The static contents of the buffer\n"
4547 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
4548 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4549 	"  current_tracer\t- function and latency tracers\n"
4550 	"  available_tracers\t- list of configured tracers for current_tracer\n"
4551 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4552 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4553 	"  trace_clock\t\t-change the clock used to order events\n"
4554 	"       local:   Per cpu clock but may not be synced across CPUs\n"
4555 	"      global:   Synced across CPUs but slows tracing down.\n"
4556 	"     counter:   Not a clock, but just an increment\n"
4557 	"      uptime:   Jiffy counter from time of boot\n"
4558 	"        perf:   Same clock that perf events use\n"
4559 #ifdef CONFIG_X86_64
4560 	"     x86-tsc:   TSC cycle counter\n"
4561 #endif
4562 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4563 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4564 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
4565 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4566 	"\t\t\t  Remove sub-buffer with rmdir\n"
4567 	"  trace_options\t\t- Set format or modify how tracing happens\n"
4568 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4569 	"\t\t\t  option name\n"
4570 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4571 #ifdef CONFIG_DYNAMIC_FTRACE
4572 	"\n  available_filter_functions - list of functions that can be filtered on\n"
4573 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
4574 	"\t\t\t  functions\n"
4575 	"\t     accepts: func_full_name or glob-matching-pattern\n"
4576 	"\t     modules: Can select a group via module\n"
4577 	"\t      Format: :mod:<module-name>\n"
4578 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4579 	"\t    triggers: a command to perform when function is hit\n"
4580 	"\t      Format: <function>:<trigger>[:count]\n"
4581 	"\t     trigger: traceon, traceoff\n"
4582 	"\t\t      enable_event:<system>:<event>\n"
4583 	"\t\t      disable_event:<system>:<event>\n"
4584 #ifdef CONFIG_STACKTRACE
4585 	"\t\t      stacktrace\n"
4586 #endif
4587 #ifdef CONFIG_TRACER_SNAPSHOT
4588 	"\t\t      snapshot\n"
4589 #endif
4590 	"\t\t      dump\n"
4591 	"\t\t      cpudump\n"
4592 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4593 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4594 	"\t     The first one will disable tracing every time do_fault is hit\n"
4595 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4596 	"\t       The first time do trap is hit and it disables tracing, the\n"
4597 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
4598 	"\t       the counter will not decrement. It only decrements when the\n"
4599 	"\t       trigger did work\n"
4600 	"\t     To remove trigger without count:\n"
4601 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4602 	"\t     To remove trigger with a count:\n"
4603 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4604 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4605 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4606 	"\t    modules: Can select a group via module command :mod:\n"
4607 	"\t    Does not accept triggers\n"
4608 #endif /* CONFIG_DYNAMIC_FTRACE */
4609 #ifdef CONFIG_FUNCTION_TRACER
4610 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4611 	"\t\t    (function)\n"
4612 #endif
4613 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4614 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4615 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4616 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4617 #endif
4618 #ifdef CONFIG_TRACER_SNAPSHOT
4619 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4620 	"\t\t\t  snapshot buffer. Read the contents for more\n"
4621 	"\t\t\t  information\n"
4622 #endif
4623 #ifdef CONFIG_STACK_TRACER
4624 	"  stack_trace\t\t- Shows the max stack trace when active\n"
4625 	"  stack_max_size\t- Shows current max stack size that was traced\n"
4626 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
4627 	"\t\t\t  new trace)\n"
4628 #ifdef CONFIG_DYNAMIC_FTRACE
4629 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4630 	"\t\t\t  traces\n"
4631 #endif
4632 #endif /* CONFIG_STACK_TRACER */
4633 #ifdef CONFIG_KPROBE_EVENTS
4634 	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4635 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4636 #endif
4637 #ifdef CONFIG_UPROBE_EVENTS
4638 	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4639 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
4640 #endif
4641 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4642 	"\t  accepts: event-definitions (one definition per line)\n"
4643 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4644 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4645 	"\t           -:[<group>/]<event>\n"
4646 #ifdef CONFIG_KPROBE_EVENTS
4647 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4648   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4649 #endif
4650 #ifdef CONFIG_UPROBE_EVENTS
4651 	"\t    place: <path>:<offset>\n"
4652 #endif
4653 	"\t     args: <name>=fetcharg[:type]\n"
4654 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4655 	"\t           $stack<index>, $stack, $retval, $comm\n"
4656 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4657 	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
4658 #endif
4659 	"  events/\t\t- Directory containing all trace event subsystems:\n"
4660 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4661 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
4662 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4663 	"\t\t\t  events\n"
4664 	"      filter\t\t- If set, only events passing filter are traced\n"
4665 	"  events/<system>/<event>/\t- Directory containing control files for\n"
4666 	"\t\t\t  <event>:\n"
4667 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4668 	"      filter\t\t- If set, only events passing filter are traced\n"
4669 	"      trigger\t\t- If set, a command to perform when event is hit\n"
4670 	"\t    Format: <trigger>[:count][if <filter>]\n"
4671 	"\t   trigger: traceon, traceoff\n"
4672 	"\t            enable_event:<system>:<event>\n"
4673 	"\t            disable_event:<system>:<event>\n"
4674 #ifdef CONFIG_HIST_TRIGGERS
4675 	"\t            enable_hist:<system>:<event>\n"
4676 	"\t            disable_hist:<system>:<event>\n"
4677 #endif
4678 #ifdef CONFIG_STACKTRACE
4679 	"\t\t    stacktrace\n"
4680 #endif
4681 #ifdef CONFIG_TRACER_SNAPSHOT
4682 	"\t\t    snapshot\n"
4683 #endif
4684 #ifdef CONFIG_HIST_TRIGGERS
4685 	"\t\t    hist (see below)\n"
4686 #endif
4687 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4688 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4689 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4690 	"\t                  events/block/block_unplug/trigger\n"
4691 	"\t   The first disables tracing every time block_unplug is hit.\n"
4692 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4693 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4694 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4695 	"\t   Like function triggers, the counter is only decremented if it\n"
4696 	"\t    enabled or disabled tracing.\n"
4697 	"\t   To remove a trigger without a count:\n"
4698 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
4699 	"\t   To remove a trigger with a count:\n"
4700 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4701 	"\t   Filters can be ignored when removing a trigger.\n"
4702 #ifdef CONFIG_HIST_TRIGGERS
4703 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4704 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
4705 	"\t            [:values=<field1[,field2,...]>]\n"
4706 	"\t            [:sort=<field1[,field2,...]>]\n"
4707 	"\t            [:size=#entries]\n"
4708 	"\t            [:pause][:continue][:clear]\n"
4709 	"\t            [:name=histname1]\n"
4710 	"\t            [if <filter>]\n\n"
4711 	"\t    When a matching event is hit, an entry is added to a hash\n"
4712 	"\t    table using the key(s) and value(s) named, and the value of a\n"
4713 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
4714 	"\t    correspond to fields in the event's format description.  Keys\n"
4715 	"\t    can be any field, or the special string 'stacktrace'.\n"
4716 	"\t    Compound keys consisting of up to two fields can be specified\n"
4717 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4718 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
4719 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
4720 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
4721 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
4722 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
4723 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
4724 	"\t    its histogram data will be shared with other triggers of the\n"
4725 	"\t    same name, and trigger hits will update this common data.\n\n"
4726 	"\t    Reading the 'hist' file for the event will dump the hash\n"
4727 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
4728 	"\t    triggers attached to an event, there will be a table for each\n"
4729 	"\t    trigger in the output.  The table displayed for a named\n"
4730 	"\t    trigger will be the same as any other instance having the\n"
4731 	"\t    same name.  The default format used to display a given field\n"
4732 	"\t    can be modified by appending any of the following modifiers\n"
4733 	"\t    to the field name, as applicable:\n\n"
4734 	"\t            .hex        display a number as a hex value\n"
4735 	"\t            .sym        display an address as a symbol\n"
4736 	"\t            .sym-offset display an address as a symbol and offset\n"
4737 	"\t            .execname   display a common_pid as a program name\n"
4738 	"\t            .syscall    display a syscall id as a syscall name\n\n"
4739 	"\t            .log2       display log2 value rather than raw number\n\n"
4740 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
4741 	"\t    trigger or to start a hist trigger but not log any events\n"
4742 	"\t    until told to do so.  'continue' can be used to start or\n"
4743 	"\t    restart a paused hist trigger.\n\n"
4744 	"\t    The 'clear' parameter will clear the contents of a running\n"
4745 	"\t    hist trigger and leave its current paused/active state\n"
4746 	"\t    unchanged.\n\n"
4747 	"\t    The enable_hist and disable_hist triggers can be used to\n"
4748 	"\t    have one event conditionally start and stop another event's\n"
4749 	"\t    already-attached hist trigger.  The syntax is analagous to\n"
4750 	"\t    the enable_event and disable_event triggers.\n"
4751 #endif
4752 ;
4753 
4754 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4755 tracing_readme_read(struct file *filp, char __user *ubuf,
4756 		       size_t cnt, loff_t *ppos)
4757 {
4758 	return simple_read_from_buffer(ubuf, cnt, ppos,
4759 					readme_msg, strlen(readme_msg));
4760 }
4761 
4762 static const struct file_operations tracing_readme_fops = {
4763 	.open		= tracing_open_generic,
4764 	.read		= tracing_readme_read,
4765 	.llseek		= generic_file_llseek,
4766 };
4767 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)4768 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4769 {
4770 	int *ptr = v;
4771 
4772 	if (*pos || m->count)
4773 		ptr++;
4774 
4775 	(*pos)++;
4776 
4777 	for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4778 		if (trace_find_tgid(*ptr))
4779 			return ptr;
4780 	}
4781 
4782 	return NULL;
4783 }
4784 
saved_tgids_start(struct seq_file * m,loff_t * pos)4785 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4786 {
4787 	void *v;
4788 	loff_t l = 0;
4789 
4790 	if (!tgid_map)
4791 		return NULL;
4792 
4793 	v = &tgid_map[0];
4794 	while (l <= *pos) {
4795 		v = saved_tgids_next(m, v, &l);
4796 		if (!v)
4797 			return NULL;
4798 	}
4799 
4800 	return v;
4801 }
4802 
saved_tgids_stop(struct seq_file * m,void * v)4803 static void saved_tgids_stop(struct seq_file *m, void *v)
4804 {
4805 }
4806 
saved_tgids_show(struct seq_file * m,void * v)4807 static int saved_tgids_show(struct seq_file *m, void *v)
4808 {
4809 	int pid = (int *)v - tgid_map;
4810 
4811 	seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4812 	return 0;
4813 }
4814 
4815 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4816 	.start		= saved_tgids_start,
4817 	.stop		= saved_tgids_stop,
4818 	.next		= saved_tgids_next,
4819 	.show		= saved_tgids_show,
4820 };
4821 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)4822 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4823 {
4824 	if (tracing_disabled)
4825 		return -ENODEV;
4826 
4827 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
4828 }
4829 
4830 
4831 static const struct file_operations tracing_saved_tgids_fops = {
4832 	.open		= tracing_saved_tgids_open,
4833 	.read		= seq_read,
4834 	.llseek		= seq_lseek,
4835 	.release	= seq_release,
4836 };
4837 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)4838 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4839 {
4840 	unsigned int *ptr = v;
4841 
4842 	if (*pos || m->count)
4843 		ptr++;
4844 
4845 	(*pos)++;
4846 
4847 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4848 	     ptr++) {
4849 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4850 			continue;
4851 
4852 		return ptr;
4853 	}
4854 
4855 	return NULL;
4856 }
4857 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)4858 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4859 {
4860 	void *v;
4861 	loff_t l = 0;
4862 
4863 	preempt_disable();
4864 	arch_spin_lock(&trace_cmdline_lock);
4865 
4866 	v = &savedcmd->map_cmdline_to_pid[0];
4867 	while (l <= *pos) {
4868 		v = saved_cmdlines_next(m, v, &l);
4869 		if (!v)
4870 			return NULL;
4871 	}
4872 
4873 	return v;
4874 }
4875 
saved_cmdlines_stop(struct seq_file * m,void * v)4876 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4877 {
4878 	arch_spin_unlock(&trace_cmdline_lock);
4879 	preempt_enable();
4880 }
4881 
saved_cmdlines_show(struct seq_file * m,void * v)4882 static int saved_cmdlines_show(struct seq_file *m, void *v)
4883 {
4884 	char buf[TASK_COMM_LEN];
4885 	unsigned int *pid = v;
4886 
4887 	__trace_find_cmdline(*pid, buf);
4888 	seq_printf(m, "%d %s\n", *pid, buf);
4889 	return 0;
4890 }
4891 
4892 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4893 	.start		= saved_cmdlines_start,
4894 	.next		= saved_cmdlines_next,
4895 	.stop		= saved_cmdlines_stop,
4896 	.show		= saved_cmdlines_show,
4897 };
4898 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)4899 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4900 {
4901 	if (tracing_disabled)
4902 		return -ENODEV;
4903 
4904 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4905 }
4906 
4907 static const struct file_operations tracing_saved_cmdlines_fops = {
4908 	.open		= tracing_saved_cmdlines_open,
4909 	.read		= seq_read,
4910 	.llseek		= seq_lseek,
4911 	.release	= seq_release,
4912 };
4913 
4914 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4915 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4916 				 size_t cnt, loff_t *ppos)
4917 {
4918 	char buf[64];
4919 	int r;
4920 
4921 	arch_spin_lock(&trace_cmdline_lock);
4922 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4923 	arch_spin_unlock(&trace_cmdline_lock);
4924 
4925 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4926 }
4927 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)4928 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4929 {
4930 	kfree(s->saved_cmdlines);
4931 	kfree(s->map_cmdline_to_pid);
4932 	kfree(s);
4933 }
4934 
tracing_resize_saved_cmdlines(unsigned int val)4935 static int tracing_resize_saved_cmdlines(unsigned int val)
4936 {
4937 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4938 
4939 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4940 	if (!s)
4941 		return -ENOMEM;
4942 
4943 	if (allocate_cmdlines_buffer(val, s) < 0) {
4944 		kfree(s);
4945 		return -ENOMEM;
4946 	}
4947 
4948 	arch_spin_lock(&trace_cmdline_lock);
4949 	savedcmd_temp = savedcmd;
4950 	savedcmd = s;
4951 	arch_spin_unlock(&trace_cmdline_lock);
4952 	free_saved_cmdlines_buffer(savedcmd_temp);
4953 
4954 	return 0;
4955 }
4956 
4957 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4958 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4959 				  size_t cnt, loff_t *ppos)
4960 {
4961 	unsigned long val;
4962 	int ret;
4963 
4964 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4965 	if (ret)
4966 		return ret;
4967 
4968 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4969 	if (!val || val > PID_MAX_DEFAULT)
4970 		return -EINVAL;
4971 
4972 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4973 	if (ret < 0)
4974 		return ret;
4975 
4976 	*ppos += cnt;
4977 
4978 	return cnt;
4979 }
4980 
4981 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4982 	.open		= tracing_open_generic,
4983 	.read		= tracing_saved_cmdlines_size_read,
4984 	.write		= tracing_saved_cmdlines_size_write,
4985 };
4986 
4987 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4988 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)4989 update_eval_map(union trace_eval_map_item *ptr)
4990 {
4991 	if (!ptr->map.eval_string) {
4992 		if (ptr->tail.next) {
4993 			ptr = ptr->tail.next;
4994 			/* Set ptr to the next real item (skip head) */
4995 			ptr++;
4996 		} else
4997 			return NULL;
4998 	}
4999 	return ptr;
5000 }
5001 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5002 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5003 {
5004 	union trace_eval_map_item *ptr = v;
5005 
5006 	/*
5007 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5008 	 * This really should never happen.
5009 	 */
5010 	ptr = update_eval_map(ptr);
5011 	if (WARN_ON_ONCE(!ptr))
5012 		return NULL;
5013 
5014 	ptr++;
5015 
5016 	(*pos)++;
5017 
5018 	ptr = update_eval_map(ptr);
5019 
5020 	return ptr;
5021 }
5022 
eval_map_start(struct seq_file * m,loff_t * pos)5023 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5024 {
5025 	union trace_eval_map_item *v;
5026 	loff_t l = 0;
5027 
5028 	mutex_lock(&trace_eval_mutex);
5029 
5030 	v = trace_eval_maps;
5031 	if (v)
5032 		v++;
5033 
5034 	while (v && l < *pos) {
5035 		v = eval_map_next(m, v, &l);
5036 	}
5037 
5038 	return v;
5039 }
5040 
eval_map_stop(struct seq_file * m,void * v)5041 static void eval_map_stop(struct seq_file *m, void *v)
5042 {
5043 	mutex_unlock(&trace_eval_mutex);
5044 }
5045 
eval_map_show(struct seq_file * m,void * v)5046 static int eval_map_show(struct seq_file *m, void *v)
5047 {
5048 	union trace_eval_map_item *ptr = v;
5049 
5050 	seq_printf(m, "%s %ld (%s)\n",
5051 		   ptr->map.eval_string, ptr->map.eval_value,
5052 		   ptr->map.system);
5053 
5054 	return 0;
5055 }
5056 
5057 static const struct seq_operations tracing_eval_map_seq_ops = {
5058 	.start		= eval_map_start,
5059 	.next		= eval_map_next,
5060 	.stop		= eval_map_stop,
5061 	.show		= eval_map_show,
5062 };
5063 
tracing_eval_map_open(struct inode * inode,struct file * filp)5064 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5065 {
5066 	if (tracing_disabled)
5067 		return -ENODEV;
5068 
5069 	return seq_open(filp, &tracing_eval_map_seq_ops);
5070 }
5071 
5072 static const struct file_operations tracing_eval_map_fops = {
5073 	.open		= tracing_eval_map_open,
5074 	.read		= seq_read,
5075 	.llseek		= seq_lseek,
5076 	.release	= seq_release,
5077 };
5078 
5079 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5080 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5081 {
5082 	/* Return tail of array given the head */
5083 	return ptr + ptr->head.length + 1;
5084 }
5085 
5086 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5087 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5088 			   int len)
5089 {
5090 	struct trace_eval_map **stop;
5091 	struct trace_eval_map **map;
5092 	union trace_eval_map_item *map_array;
5093 	union trace_eval_map_item *ptr;
5094 
5095 	stop = start + len;
5096 
5097 	/*
5098 	 * The trace_eval_maps contains the map plus a head and tail item,
5099 	 * where the head holds the module and length of array, and the
5100 	 * tail holds a pointer to the next list.
5101 	 */
5102 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
5103 	if (!map_array) {
5104 		pr_warn("Unable to allocate trace eval mapping\n");
5105 		return;
5106 	}
5107 
5108 	mutex_lock(&trace_eval_mutex);
5109 
5110 	if (!trace_eval_maps)
5111 		trace_eval_maps = map_array;
5112 	else {
5113 		ptr = trace_eval_maps;
5114 		for (;;) {
5115 			ptr = trace_eval_jmp_to_tail(ptr);
5116 			if (!ptr->tail.next)
5117 				break;
5118 			ptr = ptr->tail.next;
5119 
5120 		}
5121 		ptr->tail.next = map_array;
5122 	}
5123 	map_array->head.mod = mod;
5124 	map_array->head.length = len;
5125 	map_array++;
5126 
5127 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5128 		map_array->map = **map;
5129 		map_array++;
5130 	}
5131 	memset(map_array, 0, sizeof(*map_array));
5132 
5133 	mutex_unlock(&trace_eval_mutex);
5134 }
5135 
trace_create_eval_file(struct dentry * d_tracer)5136 static void trace_create_eval_file(struct dentry *d_tracer)
5137 {
5138 	trace_create_file("eval_map", 0444, d_tracer,
5139 			  NULL, &tracing_eval_map_fops);
5140 }
5141 
5142 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5143 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5144 static inline void trace_insert_eval_map_file(struct module *mod,
5145 			      struct trace_eval_map **start, int len) { }
5146 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5147 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5148 static void trace_insert_eval_map(struct module *mod,
5149 				  struct trace_eval_map **start, int len)
5150 {
5151 	struct trace_eval_map **map;
5152 
5153 	if (len <= 0)
5154 		return;
5155 
5156 	map = start;
5157 
5158 	trace_event_eval_update(map, len);
5159 
5160 	trace_insert_eval_map_file(mod, start, len);
5161 }
5162 
5163 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5164 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5165 		       size_t cnt, loff_t *ppos)
5166 {
5167 	struct trace_array *tr = filp->private_data;
5168 	char buf[MAX_TRACER_SIZE+2];
5169 	int r;
5170 
5171 	mutex_lock(&trace_types_lock);
5172 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5173 	mutex_unlock(&trace_types_lock);
5174 
5175 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5176 }
5177 
tracer_init(struct tracer * t,struct trace_array * tr)5178 int tracer_init(struct tracer *t, struct trace_array *tr)
5179 {
5180 	tracing_reset_online_cpus(&tr->trace_buffer);
5181 	return t->init(tr);
5182 }
5183 
set_buffer_entries(struct trace_buffer * buf,unsigned long val)5184 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5185 {
5186 	int cpu;
5187 
5188 	for_each_tracing_cpu(cpu)
5189 		per_cpu_ptr(buf->data, cpu)->entries = val;
5190 }
5191 
5192 #ifdef CONFIG_TRACER_MAX_TRACE
5193 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)5194 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5195 					struct trace_buffer *size_buf, int cpu_id)
5196 {
5197 	int cpu, ret = 0;
5198 
5199 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5200 		for_each_tracing_cpu(cpu) {
5201 			ret = ring_buffer_resize(trace_buf->buffer,
5202 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5203 			if (ret < 0)
5204 				break;
5205 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5206 				per_cpu_ptr(size_buf->data, cpu)->entries;
5207 		}
5208 	} else {
5209 		ret = ring_buffer_resize(trace_buf->buffer,
5210 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5211 		if (ret == 0)
5212 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5213 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5214 	}
5215 
5216 	return ret;
5217 }
5218 #endif /* CONFIG_TRACER_MAX_TRACE */
5219 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5220 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5221 					unsigned long size, int cpu)
5222 {
5223 	int ret;
5224 
5225 	/*
5226 	 * If kernel or user changes the size of the ring buffer
5227 	 * we use the size that was given, and we can forget about
5228 	 * expanding it later.
5229 	 */
5230 	ring_buffer_expanded = true;
5231 
5232 	/* May be called before buffers are initialized */
5233 	if (!tr->trace_buffer.buffer)
5234 		return 0;
5235 
5236 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5237 	if (ret < 0)
5238 		return ret;
5239 
5240 #ifdef CONFIG_TRACER_MAX_TRACE
5241 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5242 	    !tr->current_trace->use_max_tr)
5243 		goto out;
5244 
5245 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5246 	if (ret < 0) {
5247 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5248 						     &tr->trace_buffer, cpu);
5249 		if (r < 0) {
5250 			/*
5251 			 * AARGH! We are left with different
5252 			 * size max buffer!!!!
5253 			 * The max buffer is our "snapshot" buffer.
5254 			 * When a tracer needs a snapshot (one of the
5255 			 * latency tracers), it swaps the max buffer
5256 			 * with the saved snap shot. We succeeded to
5257 			 * update the size of the main buffer, but failed to
5258 			 * update the size of the max buffer. But when we tried
5259 			 * to reset the main buffer to the original size, we
5260 			 * failed there too. This is very unlikely to
5261 			 * happen, but if it does, warn and kill all
5262 			 * tracing.
5263 			 */
5264 			WARN_ON(1);
5265 			tracing_disabled = 1;
5266 		}
5267 		return ret;
5268 	}
5269 
5270 	if (cpu == RING_BUFFER_ALL_CPUS)
5271 		set_buffer_entries(&tr->max_buffer, size);
5272 	else
5273 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5274 
5275  out:
5276 #endif /* CONFIG_TRACER_MAX_TRACE */
5277 
5278 	if (cpu == RING_BUFFER_ALL_CPUS)
5279 		set_buffer_entries(&tr->trace_buffer, size);
5280 	else
5281 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5282 
5283 	return ret;
5284 }
5285 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5286 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5287 					  unsigned long size, int cpu_id)
5288 {
5289 	int ret = size;
5290 
5291 	mutex_lock(&trace_types_lock);
5292 
5293 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5294 		/* make sure, this cpu is enabled in the mask */
5295 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5296 			ret = -EINVAL;
5297 			goto out;
5298 		}
5299 	}
5300 
5301 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5302 	if (ret < 0)
5303 		ret = -ENOMEM;
5304 
5305 out:
5306 	mutex_unlock(&trace_types_lock);
5307 
5308 	return ret;
5309 }
5310 
5311 
5312 /**
5313  * tracing_update_buffers - used by tracing facility to expand ring buffers
5314  *
5315  * To save on memory when the tracing is never used on a system with it
5316  * configured in. The ring buffers are set to a minimum size. But once
5317  * a user starts to use the tracing facility, then they need to grow
5318  * to their default size.
5319  *
5320  * This function is to be called when a tracer is about to be used.
5321  */
tracing_update_buffers(void)5322 int tracing_update_buffers(void)
5323 {
5324 	int ret = 0;
5325 
5326 	mutex_lock(&trace_types_lock);
5327 	if (!ring_buffer_expanded)
5328 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5329 						RING_BUFFER_ALL_CPUS);
5330 	mutex_unlock(&trace_types_lock);
5331 
5332 	return ret;
5333 }
5334 
5335 struct trace_option_dentry;
5336 
5337 static void
5338 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5339 
5340 /*
5341  * Used to clear out the tracer before deletion of an instance.
5342  * Must have trace_types_lock held.
5343  */
tracing_set_nop(struct trace_array * tr)5344 static void tracing_set_nop(struct trace_array *tr)
5345 {
5346 	if (tr->current_trace == &nop_trace)
5347 		return;
5348 
5349 	tr->current_trace->enabled--;
5350 
5351 	if (tr->current_trace->reset)
5352 		tr->current_trace->reset(tr);
5353 
5354 	tr->current_trace = &nop_trace;
5355 }
5356 
add_tracer_options(struct trace_array * tr,struct tracer * t)5357 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5358 {
5359 	/* Only enable if the directory has been created already. */
5360 	if (!tr->dir)
5361 		return;
5362 
5363 	create_trace_option_files(tr, t);
5364 }
5365 
tracing_set_tracer(struct trace_array * tr,const char * buf)5366 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5367 {
5368 	struct tracer *t;
5369 #ifdef CONFIG_TRACER_MAX_TRACE
5370 	bool had_max_tr;
5371 #endif
5372 	int ret = 0;
5373 
5374 	mutex_lock(&trace_types_lock);
5375 
5376 	if (!ring_buffer_expanded) {
5377 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5378 						RING_BUFFER_ALL_CPUS);
5379 		if (ret < 0)
5380 			goto out;
5381 		ret = 0;
5382 	}
5383 
5384 	for (t = trace_types; t; t = t->next) {
5385 		if (strcmp(t->name, buf) == 0)
5386 			break;
5387 	}
5388 	if (!t) {
5389 		ret = -EINVAL;
5390 		goto out;
5391 	}
5392 	if (t == tr->current_trace)
5393 		goto out;
5394 
5395 	/* Some tracers won't work on kernel command line */
5396 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5397 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5398 			t->name);
5399 		goto out;
5400 	}
5401 
5402 	/* Some tracers are only allowed for the top level buffer */
5403 	if (!trace_ok_for_array(t, tr)) {
5404 		ret = -EINVAL;
5405 		goto out;
5406 	}
5407 
5408 	/* If trace pipe files are being read, we can't change the tracer */
5409 	if (tr->current_trace->ref) {
5410 		ret = -EBUSY;
5411 		goto out;
5412 	}
5413 
5414 	trace_branch_disable();
5415 
5416 	tr->current_trace->enabled--;
5417 
5418 	if (tr->current_trace->reset)
5419 		tr->current_trace->reset(tr);
5420 
5421 	/* Current trace needs to be nop_trace before synchronize_sched */
5422 	tr->current_trace = &nop_trace;
5423 
5424 #ifdef CONFIG_TRACER_MAX_TRACE
5425 	had_max_tr = tr->allocated_snapshot;
5426 
5427 	if (had_max_tr && !t->use_max_tr) {
5428 		/*
5429 		 * We need to make sure that the update_max_tr sees that
5430 		 * current_trace changed to nop_trace to keep it from
5431 		 * swapping the buffers after we resize it.
5432 		 * The update_max_tr is called from interrupts disabled
5433 		 * so a synchronized_sched() is sufficient.
5434 		 */
5435 		synchronize_sched();
5436 		free_snapshot(tr);
5437 	}
5438 #endif
5439 
5440 #ifdef CONFIG_TRACER_MAX_TRACE
5441 	if (t->use_max_tr && !had_max_tr) {
5442 		ret = tracing_alloc_snapshot_instance(tr);
5443 		if (ret < 0)
5444 			goto out;
5445 	}
5446 #endif
5447 
5448 	if (t->init) {
5449 		ret = tracer_init(t, tr);
5450 		if (ret)
5451 			goto out;
5452 	}
5453 
5454 	tr->current_trace = t;
5455 	tr->current_trace->enabled++;
5456 	trace_branch_enable(tr);
5457  out:
5458 	mutex_unlock(&trace_types_lock);
5459 
5460 	return ret;
5461 }
5462 
5463 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5464 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5465 			size_t cnt, loff_t *ppos)
5466 {
5467 	struct trace_array *tr = filp->private_data;
5468 	char buf[MAX_TRACER_SIZE+1];
5469 	int i;
5470 	size_t ret;
5471 	int err;
5472 
5473 	ret = cnt;
5474 
5475 	if (cnt > MAX_TRACER_SIZE)
5476 		cnt = MAX_TRACER_SIZE;
5477 
5478 	if (copy_from_user(buf, ubuf, cnt))
5479 		return -EFAULT;
5480 
5481 	buf[cnt] = 0;
5482 
5483 	/* strip ending whitespace. */
5484 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5485 		buf[i] = 0;
5486 
5487 	err = tracing_set_tracer(tr, buf);
5488 	if (err)
5489 		return err;
5490 
5491 	*ppos += ret;
5492 
5493 	return ret;
5494 }
5495 
5496 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5497 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5498 		   size_t cnt, loff_t *ppos)
5499 {
5500 	char buf[64];
5501 	int r;
5502 
5503 	r = snprintf(buf, sizeof(buf), "%ld\n",
5504 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5505 	if (r > sizeof(buf))
5506 		r = sizeof(buf);
5507 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5508 }
5509 
5510 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5511 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5512 		    size_t cnt, loff_t *ppos)
5513 {
5514 	unsigned long val;
5515 	int ret;
5516 
5517 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5518 	if (ret)
5519 		return ret;
5520 
5521 	*ptr = val * 1000;
5522 
5523 	return cnt;
5524 }
5525 
5526 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5527 tracing_thresh_read(struct file *filp, char __user *ubuf,
5528 		    size_t cnt, loff_t *ppos)
5529 {
5530 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5531 }
5532 
5533 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5534 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5535 		     size_t cnt, loff_t *ppos)
5536 {
5537 	struct trace_array *tr = filp->private_data;
5538 	int ret;
5539 
5540 	mutex_lock(&trace_types_lock);
5541 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5542 	if (ret < 0)
5543 		goto out;
5544 
5545 	if (tr->current_trace->update_thresh) {
5546 		ret = tr->current_trace->update_thresh(tr);
5547 		if (ret < 0)
5548 			goto out;
5549 	}
5550 
5551 	ret = cnt;
5552 out:
5553 	mutex_unlock(&trace_types_lock);
5554 
5555 	return ret;
5556 }
5557 
5558 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5559 
5560 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5561 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5562 		     size_t cnt, loff_t *ppos)
5563 {
5564 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5565 }
5566 
5567 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5568 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5569 		      size_t cnt, loff_t *ppos)
5570 {
5571 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5572 }
5573 
5574 #endif
5575 
tracing_open_pipe(struct inode * inode,struct file * filp)5576 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5577 {
5578 	struct trace_array *tr = inode->i_private;
5579 	struct trace_iterator *iter;
5580 	int ret = 0;
5581 
5582 	if (tracing_disabled)
5583 		return -ENODEV;
5584 
5585 	if (trace_array_get(tr) < 0)
5586 		return -ENODEV;
5587 
5588 	mutex_lock(&trace_types_lock);
5589 
5590 	/* create a buffer to store the information to pass to userspace */
5591 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5592 	if (!iter) {
5593 		ret = -ENOMEM;
5594 		__trace_array_put(tr);
5595 		goto out;
5596 	}
5597 
5598 	trace_seq_init(&iter->seq);
5599 	iter->trace = tr->current_trace;
5600 
5601 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5602 		ret = -ENOMEM;
5603 		goto fail;
5604 	}
5605 
5606 	/* trace pipe does not show start of buffer */
5607 	cpumask_setall(iter->started);
5608 
5609 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5610 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
5611 
5612 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
5613 	if (trace_clocks[tr->clock_id].in_ns)
5614 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5615 
5616 	iter->tr = tr;
5617 	iter->trace_buffer = &tr->trace_buffer;
5618 	iter->cpu_file = tracing_get_cpu(inode);
5619 	mutex_init(&iter->mutex);
5620 	filp->private_data = iter;
5621 
5622 	if (iter->trace->pipe_open)
5623 		iter->trace->pipe_open(iter);
5624 
5625 	nonseekable_open(inode, filp);
5626 
5627 	tr->current_trace->ref++;
5628 out:
5629 	mutex_unlock(&trace_types_lock);
5630 	return ret;
5631 
5632 fail:
5633 	kfree(iter);
5634 	__trace_array_put(tr);
5635 	mutex_unlock(&trace_types_lock);
5636 	return ret;
5637 }
5638 
tracing_release_pipe(struct inode * inode,struct file * file)5639 static int tracing_release_pipe(struct inode *inode, struct file *file)
5640 {
5641 	struct trace_iterator *iter = file->private_data;
5642 	struct trace_array *tr = inode->i_private;
5643 
5644 	mutex_lock(&trace_types_lock);
5645 
5646 	tr->current_trace->ref--;
5647 
5648 	if (iter->trace->pipe_close)
5649 		iter->trace->pipe_close(iter);
5650 
5651 	mutex_unlock(&trace_types_lock);
5652 
5653 	free_cpumask_var(iter->started);
5654 	mutex_destroy(&iter->mutex);
5655 	kfree(iter);
5656 
5657 	trace_array_put(tr);
5658 
5659 	return 0;
5660 }
5661 
5662 static unsigned int
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5663 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5664 {
5665 	struct trace_array *tr = iter->tr;
5666 
5667 	/* Iterators are static, they should be filled or empty */
5668 	if (trace_buffer_iter(iter, iter->cpu_file))
5669 		return POLLIN | POLLRDNORM;
5670 
5671 	if (tr->trace_flags & TRACE_ITER_BLOCK)
5672 		/*
5673 		 * Always select as readable when in blocking mode
5674 		 */
5675 		return POLLIN | POLLRDNORM;
5676 	else
5677 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5678 					     filp, poll_table);
5679 }
5680 
5681 static unsigned int
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5682 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5683 {
5684 	struct trace_iterator *iter = filp->private_data;
5685 
5686 	return trace_poll(iter, filp, poll_table);
5687 }
5688 
5689 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5690 static int tracing_wait_pipe(struct file *filp)
5691 {
5692 	struct trace_iterator *iter = filp->private_data;
5693 	int ret;
5694 
5695 	while (trace_empty(iter)) {
5696 
5697 		if ((filp->f_flags & O_NONBLOCK)) {
5698 			return -EAGAIN;
5699 		}
5700 
5701 		/*
5702 		 * We block until we read something and tracing is disabled.
5703 		 * We still block if tracing is disabled, but we have never
5704 		 * read anything. This allows a user to cat this file, and
5705 		 * then enable tracing. But after we have read something,
5706 		 * we give an EOF when tracing is again disabled.
5707 		 *
5708 		 * iter->pos will be 0 if we haven't read anything.
5709 		 */
5710 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5711 			break;
5712 
5713 		mutex_unlock(&iter->mutex);
5714 
5715 		ret = wait_on_pipe(iter, false);
5716 
5717 		mutex_lock(&iter->mutex);
5718 
5719 		if (ret)
5720 			return ret;
5721 	}
5722 
5723 	return 1;
5724 }
5725 
5726 /*
5727  * Consumer reader.
5728  */
5729 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5730 tracing_read_pipe(struct file *filp, char __user *ubuf,
5731 		  size_t cnt, loff_t *ppos)
5732 {
5733 	struct trace_iterator *iter = filp->private_data;
5734 	ssize_t sret;
5735 
5736 	/*
5737 	 * Avoid more than one consumer on a single file descriptor
5738 	 * This is just a matter of traces coherency, the ring buffer itself
5739 	 * is protected.
5740 	 */
5741 	mutex_lock(&iter->mutex);
5742 
5743 	/* return any leftover data */
5744 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5745 	if (sret != -EBUSY)
5746 		goto out;
5747 
5748 	trace_seq_init(&iter->seq);
5749 
5750 	if (iter->trace->read) {
5751 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5752 		if (sret)
5753 			goto out;
5754 	}
5755 
5756 waitagain:
5757 	sret = tracing_wait_pipe(filp);
5758 	if (sret <= 0)
5759 		goto out;
5760 
5761 	/* stop when tracing is finished */
5762 	if (trace_empty(iter)) {
5763 		sret = 0;
5764 		goto out;
5765 	}
5766 
5767 	if (cnt >= PAGE_SIZE)
5768 		cnt = PAGE_SIZE - 1;
5769 
5770 	/* reset all but tr, trace, and overruns */
5771 	memset(&iter->seq, 0,
5772 	       sizeof(struct trace_iterator) -
5773 	       offsetof(struct trace_iterator, seq));
5774 	cpumask_clear(iter->started);
5775 	trace_seq_init(&iter->seq);
5776 	iter->pos = -1;
5777 
5778 	trace_event_read_lock();
5779 	trace_access_lock(iter->cpu_file);
5780 	while (trace_find_next_entry_inc(iter) != NULL) {
5781 		enum print_line_t ret;
5782 		int save_len = iter->seq.seq.len;
5783 
5784 		ret = print_trace_line(iter);
5785 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5786 			/* don't print partial lines */
5787 			iter->seq.seq.len = save_len;
5788 			break;
5789 		}
5790 		if (ret != TRACE_TYPE_NO_CONSUME)
5791 			trace_consume(iter);
5792 
5793 		if (trace_seq_used(&iter->seq) >= cnt)
5794 			break;
5795 
5796 		/*
5797 		 * Setting the full flag means we reached the trace_seq buffer
5798 		 * size and we should leave by partial output condition above.
5799 		 * One of the trace_seq_* functions is not used properly.
5800 		 */
5801 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5802 			  iter->ent->type);
5803 	}
5804 	trace_access_unlock(iter->cpu_file);
5805 	trace_event_read_unlock();
5806 
5807 	/* Now copy what we have to the user */
5808 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5809 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5810 		trace_seq_init(&iter->seq);
5811 
5812 	/*
5813 	 * If there was nothing to send to user, in spite of consuming trace
5814 	 * entries, go back to wait for more entries.
5815 	 */
5816 	if (sret == -EBUSY)
5817 		goto waitagain;
5818 
5819 out:
5820 	mutex_unlock(&iter->mutex);
5821 
5822 	return sret;
5823 }
5824 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)5825 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5826 				     unsigned int idx)
5827 {
5828 	__free_page(spd->pages[idx]);
5829 }
5830 
5831 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5832 	.can_merge		= 0,
5833 	.confirm		= generic_pipe_buf_confirm,
5834 	.release		= generic_pipe_buf_release,
5835 	.steal			= generic_pipe_buf_steal,
5836 	.get			= generic_pipe_buf_get,
5837 };
5838 
5839 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)5840 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5841 {
5842 	size_t count;
5843 	int save_len;
5844 	int ret;
5845 
5846 	/* Seq buffer is page-sized, exactly what we need. */
5847 	for (;;) {
5848 		save_len = iter->seq.seq.len;
5849 		ret = print_trace_line(iter);
5850 
5851 		if (trace_seq_has_overflowed(&iter->seq)) {
5852 			iter->seq.seq.len = save_len;
5853 			break;
5854 		}
5855 
5856 		/*
5857 		 * This should not be hit, because it should only
5858 		 * be set if the iter->seq overflowed. But check it
5859 		 * anyway to be safe.
5860 		 */
5861 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
5862 			iter->seq.seq.len = save_len;
5863 			break;
5864 		}
5865 
5866 		count = trace_seq_used(&iter->seq) - save_len;
5867 		if (rem < count) {
5868 			rem = 0;
5869 			iter->seq.seq.len = save_len;
5870 			break;
5871 		}
5872 
5873 		if (ret != TRACE_TYPE_NO_CONSUME)
5874 			trace_consume(iter);
5875 		rem -= count;
5876 		if (!trace_find_next_entry_inc(iter))	{
5877 			rem = 0;
5878 			iter->ent = NULL;
5879 			break;
5880 		}
5881 	}
5882 
5883 	return rem;
5884 }
5885 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)5886 static ssize_t tracing_splice_read_pipe(struct file *filp,
5887 					loff_t *ppos,
5888 					struct pipe_inode_info *pipe,
5889 					size_t len,
5890 					unsigned int flags)
5891 {
5892 	struct page *pages_def[PIPE_DEF_BUFFERS];
5893 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5894 	struct trace_iterator *iter = filp->private_data;
5895 	struct splice_pipe_desc spd = {
5896 		.pages		= pages_def,
5897 		.partial	= partial_def,
5898 		.nr_pages	= 0, /* This gets updated below. */
5899 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5900 		.ops		= &tracing_pipe_buf_ops,
5901 		.spd_release	= tracing_spd_release_pipe,
5902 	};
5903 	ssize_t ret;
5904 	size_t rem;
5905 	unsigned int i;
5906 
5907 	if (splice_grow_spd(pipe, &spd))
5908 		return -ENOMEM;
5909 
5910 	mutex_lock(&iter->mutex);
5911 
5912 	if (iter->trace->splice_read) {
5913 		ret = iter->trace->splice_read(iter, filp,
5914 					       ppos, pipe, len, flags);
5915 		if (ret)
5916 			goto out_err;
5917 	}
5918 
5919 	ret = tracing_wait_pipe(filp);
5920 	if (ret <= 0)
5921 		goto out_err;
5922 
5923 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5924 		ret = -EFAULT;
5925 		goto out_err;
5926 	}
5927 
5928 	trace_event_read_lock();
5929 	trace_access_lock(iter->cpu_file);
5930 
5931 	/* Fill as many pages as possible. */
5932 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5933 		spd.pages[i] = alloc_page(GFP_KERNEL);
5934 		if (!spd.pages[i])
5935 			break;
5936 
5937 		rem = tracing_fill_pipe_page(rem, iter);
5938 
5939 		/* Copy the data into the page, so we can start over. */
5940 		ret = trace_seq_to_buffer(&iter->seq,
5941 					  page_address(spd.pages[i]),
5942 					  trace_seq_used(&iter->seq));
5943 		if (ret < 0) {
5944 			__free_page(spd.pages[i]);
5945 			break;
5946 		}
5947 		spd.partial[i].offset = 0;
5948 		spd.partial[i].len = trace_seq_used(&iter->seq);
5949 
5950 		trace_seq_init(&iter->seq);
5951 	}
5952 
5953 	trace_access_unlock(iter->cpu_file);
5954 	trace_event_read_unlock();
5955 	mutex_unlock(&iter->mutex);
5956 
5957 	spd.nr_pages = i;
5958 
5959 	if (i)
5960 		ret = splice_to_pipe(pipe, &spd);
5961 	else
5962 		ret = 0;
5963 out:
5964 	splice_shrink_spd(&spd);
5965 	return ret;
5966 
5967 out_err:
5968 	mutex_unlock(&iter->mutex);
5969 	goto out;
5970 }
5971 
5972 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5973 tracing_entries_read(struct file *filp, char __user *ubuf,
5974 		     size_t cnt, loff_t *ppos)
5975 {
5976 	struct inode *inode = file_inode(filp);
5977 	struct trace_array *tr = inode->i_private;
5978 	int cpu = tracing_get_cpu(inode);
5979 	char buf[64];
5980 	int r = 0;
5981 	ssize_t ret;
5982 
5983 	mutex_lock(&trace_types_lock);
5984 
5985 	if (cpu == RING_BUFFER_ALL_CPUS) {
5986 		int cpu, buf_size_same;
5987 		unsigned long size;
5988 
5989 		size = 0;
5990 		buf_size_same = 1;
5991 		/* check if all cpu sizes are same */
5992 		for_each_tracing_cpu(cpu) {
5993 			/* fill in the size from first enabled cpu */
5994 			if (size == 0)
5995 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5996 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5997 				buf_size_same = 0;
5998 				break;
5999 			}
6000 		}
6001 
6002 		if (buf_size_same) {
6003 			if (!ring_buffer_expanded)
6004 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6005 					    size >> 10,
6006 					    trace_buf_size >> 10);
6007 			else
6008 				r = sprintf(buf, "%lu\n", size >> 10);
6009 		} else
6010 			r = sprintf(buf, "X\n");
6011 	} else
6012 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6013 
6014 	mutex_unlock(&trace_types_lock);
6015 
6016 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6017 	return ret;
6018 }
6019 
6020 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6021 tracing_entries_write(struct file *filp, const char __user *ubuf,
6022 		      size_t cnt, loff_t *ppos)
6023 {
6024 	struct inode *inode = file_inode(filp);
6025 	struct trace_array *tr = inode->i_private;
6026 	unsigned long val;
6027 	int ret;
6028 
6029 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6030 	if (ret)
6031 		return ret;
6032 
6033 	/* must have at least 1 entry */
6034 	if (!val)
6035 		return -EINVAL;
6036 
6037 	/* value is in KB */
6038 	val <<= 10;
6039 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6040 	if (ret < 0)
6041 		return ret;
6042 
6043 	*ppos += cnt;
6044 
6045 	return cnt;
6046 }
6047 
6048 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6049 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6050 				size_t cnt, loff_t *ppos)
6051 {
6052 	struct trace_array *tr = filp->private_data;
6053 	char buf[64];
6054 	int r, cpu;
6055 	unsigned long size = 0, expanded_size = 0;
6056 
6057 	mutex_lock(&trace_types_lock);
6058 	for_each_tracing_cpu(cpu) {
6059 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6060 		if (!ring_buffer_expanded)
6061 			expanded_size += trace_buf_size >> 10;
6062 	}
6063 	if (ring_buffer_expanded)
6064 		r = sprintf(buf, "%lu\n", size);
6065 	else
6066 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6067 	mutex_unlock(&trace_types_lock);
6068 
6069 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6070 }
6071 
6072 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6073 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6074 			  size_t cnt, loff_t *ppos)
6075 {
6076 	/*
6077 	 * There is no need to read what the user has written, this function
6078 	 * is just to make sure that there is no error when "echo" is used
6079 	 */
6080 
6081 	*ppos += cnt;
6082 
6083 	return cnt;
6084 }
6085 
6086 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6087 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6088 {
6089 	struct trace_array *tr = inode->i_private;
6090 
6091 	/* disable tracing ? */
6092 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6093 		tracer_tracing_off(tr);
6094 	/* resize the ring buffer to 0 */
6095 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6096 
6097 	trace_array_put(tr);
6098 
6099 	return 0;
6100 }
6101 
6102 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6103 tracing_mark_write(struct file *filp, const char __user *ubuf,
6104 					size_t cnt, loff_t *fpos)
6105 {
6106 	struct trace_array *tr = filp->private_data;
6107 	struct ring_buffer_event *event;
6108 	struct ring_buffer *buffer;
6109 	struct print_entry *entry;
6110 	unsigned long irq_flags;
6111 	const char faulted[] = "<faulted>";
6112 	ssize_t written;
6113 	int size;
6114 	int len;
6115 
6116 /* Used in tracing_mark_raw_write() as well */
6117 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6118 
6119 	if (tracing_disabled)
6120 		return -EINVAL;
6121 
6122 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6123 		return -EINVAL;
6124 
6125 	if (cnt > TRACE_BUF_SIZE)
6126 		cnt = TRACE_BUF_SIZE;
6127 
6128 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6129 
6130 	local_save_flags(irq_flags);
6131 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6132 
6133 	/* If less than "<faulted>", then make sure we can still add that */
6134 	if (cnt < FAULTED_SIZE)
6135 		size += FAULTED_SIZE - cnt;
6136 
6137 	buffer = tr->trace_buffer.buffer;
6138 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6139 					    irq_flags, preempt_count());
6140 	if (unlikely(!event))
6141 		/* Ring buffer disabled, return as if not open for write */
6142 		return -EBADF;
6143 
6144 	entry = ring_buffer_event_data(event);
6145 	entry->ip = _THIS_IP_;
6146 
6147 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6148 	if (len) {
6149 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6150 		cnt = FAULTED_SIZE;
6151 		written = -EFAULT;
6152 	} else
6153 		written = cnt;
6154 	len = cnt;
6155 
6156 	if (entry->buf[cnt - 1] != '\n') {
6157 		entry->buf[cnt] = '\n';
6158 		entry->buf[cnt + 1] = '\0';
6159 	} else
6160 		entry->buf[cnt] = '\0';
6161 
6162 	__buffer_unlock_commit(buffer, event);
6163 
6164 	if (written > 0)
6165 		*fpos += written;
6166 
6167 	return written;
6168 }
6169 
6170 /* Limit it for now to 3K (including tag) */
6171 #define RAW_DATA_MAX_SIZE (1024*3)
6172 
6173 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6174 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6175 					size_t cnt, loff_t *fpos)
6176 {
6177 	struct trace_array *tr = filp->private_data;
6178 	struct ring_buffer_event *event;
6179 	struct ring_buffer *buffer;
6180 	struct raw_data_entry *entry;
6181 	const char faulted[] = "<faulted>";
6182 	unsigned long irq_flags;
6183 	ssize_t written;
6184 	int size;
6185 	int len;
6186 
6187 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6188 
6189 	if (tracing_disabled)
6190 		return -EINVAL;
6191 
6192 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6193 		return -EINVAL;
6194 
6195 	/* The marker must at least have a tag id */
6196 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6197 		return -EINVAL;
6198 
6199 	if (cnt > TRACE_BUF_SIZE)
6200 		cnt = TRACE_BUF_SIZE;
6201 
6202 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6203 
6204 	local_save_flags(irq_flags);
6205 	size = sizeof(*entry) + cnt;
6206 	if (cnt < FAULT_SIZE_ID)
6207 		size += FAULT_SIZE_ID - cnt;
6208 
6209 	buffer = tr->trace_buffer.buffer;
6210 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6211 					    irq_flags, preempt_count());
6212 	if (!event)
6213 		/* Ring buffer disabled, return as if not open for write */
6214 		return -EBADF;
6215 
6216 	entry = ring_buffer_event_data(event);
6217 
6218 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6219 	if (len) {
6220 		entry->id = -1;
6221 		memcpy(&entry->buf, faulted, FAULTED_SIZE);
6222 		written = -EFAULT;
6223 	} else
6224 		written = cnt;
6225 
6226 	__buffer_unlock_commit(buffer, event);
6227 
6228 	if (written > 0)
6229 		*fpos += written;
6230 
6231 	return written;
6232 }
6233 
tracing_clock_show(struct seq_file * m,void * v)6234 static int tracing_clock_show(struct seq_file *m, void *v)
6235 {
6236 	struct trace_array *tr = m->private;
6237 	int i;
6238 
6239 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6240 		seq_printf(m,
6241 			"%s%s%s%s", i ? " " : "",
6242 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6243 			i == tr->clock_id ? "]" : "");
6244 	seq_putc(m, '\n');
6245 
6246 	return 0;
6247 }
6248 
tracing_set_clock(struct trace_array * tr,const char * clockstr)6249 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6250 {
6251 	int i;
6252 
6253 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6254 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6255 			break;
6256 	}
6257 	if (i == ARRAY_SIZE(trace_clocks))
6258 		return -EINVAL;
6259 
6260 	mutex_lock(&trace_types_lock);
6261 
6262 	tr->clock_id = i;
6263 
6264 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6265 
6266 	/*
6267 	 * New clock may not be consistent with the previous clock.
6268 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6269 	 */
6270 	tracing_reset_online_cpus(&tr->trace_buffer);
6271 
6272 #ifdef CONFIG_TRACER_MAX_TRACE
6273 	if (tr->max_buffer.buffer)
6274 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6275 	tracing_reset_online_cpus(&tr->max_buffer);
6276 #endif
6277 
6278 	mutex_unlock(&trace_types_lock);
6279 
6280 	return 0;
6281 }
6282 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6283 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6284 				   size_t cnt, loff_t *fpos)
6285 {
6286 	struct seq_file *m = filp->private_data;
6287 	struct trace_array *tr = m->private;
6288 	char buf[64];
6289 	const char *clockstr;
6290 	int ret;
6291 
6292 	if (cnt >= sizeof(buf))
6293 		return -EINVAL;
6294 
6295 	if (copy_from_user(buf, ubuf, cnt))
6296 		return -EFAULT;
6297 
6298 	buf[cnt] = 0;
6299 
6300 	clockstr = strstrip(buf);
6301 
6302 	ret = tracing_set_clock(tr, clockstr);
6303 	if (ret)
6304 		return ret;
6305 
6306 	*fpos += cnt;
6307 
6308 	return cnt;
6309 }
6310 
tracing_clock_open(struct inode * inode,struct file * file)6311 static int tracing_clock_open(struct inode *inode, struct file *file)
6312 {
6313 	struct trace_array *tr = inode->i_private;
6314 	int ret;
6315 
6316 	if (tracing_disabled)
6317 		return -ENODEV;
6318 
6319 	if (trace_array_get(tr))
6320 		return -ENODEV;
6321 
6322 	ret = single_open(file, tracing_clock_show, inode->i_private);
6323 	if (ret < 0)
6324 		trace_array_put(tr);
6325 
6326 	return ret;
6327 }
6328 
6329 struct ftrace_buffer_info {
6330 	struct trace_iterator	iter;
6331 	void			*spare;
6332 	unsigned int		spare_cpu;
6333 	unsigned int		read;
6334 };
6335 
6336 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6337 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6338 {
6339 	struct trace_array *tr = inode->i_private;
6340 	struct trace_iterator *iter;
6341 	struct seq_file *m;
6342 	int ret = 0;
6343 
6344 	if (trace_array_get(tr) < 0)
6345 		return -ENODEV;
6346 
6347 	if (file->f_mode & FMODE_READ) {
6348 		iter = __tracing_open(inode, file, true);
6349 		if (IS_ERR(iter))
6350 			ret = PTR_ERR(iter);
6351 	} else {
6352 		/* Writes still need the seq_file to hold the private data */
6353 		ret = -ENOMEM;
6354 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6355 		if (!m)
6356 			goto out;
6357 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6358 		if (!iter) {
6359 			kfree(m);
6360 			goto out;
6361 		}
6362 		ret = 0;
6363 
6364 		iter->tr = tr;
6365 		iter->trace_buffer = &tr->max_buffer;
6366 		iter->cpu_file = tracing_get_cpu(inode);
6367 		m->private = iter;
6368 		file->private_data = m;
6369 	}
6370 out:
6371 	if (ret < 0)
6372 		trace_array_put(tr);
6373 
6374 	return ret;
6375 }
6376 
6377 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6378 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6379 		       loff_t *ppos)
6380 {
6381 	struct seq_file *m = filp->private_data;
6382 	struct trace_iterator *iter = m->private;
6383 	struct trace_array *tr = iter->tr;
6384 	unsigned long val;
6385 	int ret;
6386 
6387 	ret = tracing_update_buffers();
6388 	if (ret < 0)
6389 		return ret;
6390 
6391 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6392 	if (ret)
6393 		return ret;
6394 
6395 	mutex_lock(&trace_types_lock);
6396 
6397 	if (tr->current_trace->use_max_tr) {
6398 		ret = -EBUSY;
6399 		goto out;
6400 	}
6401 
6402 	switch (val) {
6403 	case 0:
6404 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6405 			ret = -EINVAL;
6406 			break;
6407 		}
6408 		if (tr->allocated_snapshot)
6409 			free_snapshot(tr);
6410 		break;
6411 	case 1:
6412 /* Only allow per-cpu swap if the ring buffer supports it */
6413 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6414 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6415 			ret = -EINVAL;
6416 			break;
6417 		}
6418 #endif
6419 		if (tr->allocated_snapshot)
6420 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
6421 					&tr->trace_buffer, iter->cpu_file);
6422 		else
6423 			ret = tracing_alloc_snapshot_instance(tr);
6424 		if (ret < 0)
6425 			break;
6426 		local_irq_disable();
6427 		/* Now, we're going to swap */
6428 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6429 			update_max_tr(tr, current, smp_processor_id());
6430 		else
6431 			update_max_tr_single(tr, current, iter->cpu_file);
6432 		local_irq_enable();
6433 		break;
6434 	default:
6435 		if (tr->allocated_snapshot) {
6436 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6437 				tracing_reset_online_cpus(&tr->max_buffer);
6438 			else
6439 				tracing_reset(&tr->max_buffer, iter->cpu_file);
6440 		}
6441 		break;
6442 	}
6443 
6444 	if (ret >= 0) {
6445 		*ppos += cnt;
6446 		ret = cnt;
6447 	}
6448 out:
6449 	mutex_unlock(&trace_types_lock);
6450 	return ret;
6451 }
6452 
tracing_snapshot_release(struct inode * inode,struct file * file)6453 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6454 {
6455 	struct seq_file *m = file->private_data;
6456 	int ret;
6457 
6458 	ret = tracing_release(inode, file);
6459 
6460 	if (file->f_mode & FMODE_READ)
6461 		return ret;
6462 
6463 	/* If write only, the seq_file is just a stub */
6464 	if (m)
6465 		kfree(m->private);
6466 	kfree(m);
6467 
6468 	return 0;
6469 }
6470 
6471 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6472 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6473 				    size_t count, loff_t *ppos);
6474 static int tracing_buffers_release(struct inode *inode, struct file *file);
6475 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6476 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6477 
snapshot_raw_open(struct inode * inode,struct file * filp)6478 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6479 {
6480 	struct ftrace_buffer_info *info;
6481 	int ret;
6482 
6483 	ret = tracing_buffers_open(inode, filp);
6484 	if (ret < 0)
6485 		return ret;
6486 
6487 	info = filp->private_data;
6488 
6489 	if (info->iter.trace->use_max_tr) {
6490 		tracing_buffers_release(inode, filp);
6491 		return -EBUSY;
6492 	}
6493 
6494 	info->iter.snapshot = true;
6495 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
6496 
6497 	return ret;
6498 }
6499 
6500 #endif /* CONFIG_TRACER_SNAPSHOT */
6501 
6502 
6503 static const struct file_operations tracing_thresh_fops = {
6504 	.open		= tracing_open_generic,
6505 	.read		= tracing_thresh_read,
6506 	.write		= tracing_thresh_write,
6507 	.llseek		= generic_file_llseek,
6508 };
6509 
6510 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6511 static const struct file_operations tracing_max_lat_fops = {
6512 	.open		= tracing_open_generic,
6513 	.read		= tracing_max_lat_read,
6514 	.write		= tracing_max_lat_write,
6515 	.llseek		= generic_file_llseek,
6516 };
6517 #endif
6518 
6519 static const struct file_operations set_tracer_fops = {
6520 	.open		= tracing_open_generic,
6521 	.read		= tracing_set_trace_read,
6522 	.write		= tracing_set_trace_write,
6523 	.llseek		= generic_file_llseek,
6524 };
6525 
6526 static const struct file_operations tracing_pipe_fops = {
6527 	.open		= tracing_open_pipe,
6528 	.poll		= tracing_poll_pipe,
6529 	.read		= tracing_read_pipe,
6530 	.splice_read	= tracing_splice_read_pipe,
6531 	.release	= tracing_release_pipe,
6532 	.llseek		= no_llseek,
6533 };
6534 
6535 static const struct file_operations tracing_entries_fops = {
6536 	.open		= tracing_open_generic_tr,
6537 	.read		= tracing_entries_read,
6538 	.write		= tracing_entries_write,
6539 	.llseek		= generic_file_llseek,
6540 	.release	= tracing_release_generic_tr,
6541 };
6542 
6543 static const struct file_operations tracing_total_entries_fops = {
6544 	.open		= tracing_open_generic_tr,
6545 	.read		= tracing_total_entries_read,
6546 	.llseek		= generic_file_llseek,
6547 	.release	= tracing_release_generic_tr,
6548 };
6549 
6550 static const struct file_operations tracing_free_buffer_fops = {
6551 	.open		= tracing_open_generic_tr,
6552 	.write		= tracing_free_buffer_write,
6553 	.release	= tracing_free_buffer_release,
6554 };
6555 
6556 static const struct file_operations tracing_mark_fops = {
6557 	.open		= tracing_open_generic_tr,
6558 	.write		= tracing_mark_write,
6559 	.llseek		= generic_file_llseek,
6560 	.release	= tracing_release_generic_tr,
6561 };
6562 
6563 static const struct file_operations tracing_mark_raw_fops = {
6564 	.open		= tracing_open_generic_tr,
6565 	.write		= tracing_mark_raw_write,
6566 	.llseek		= generic_file_llseek,
6567 	.release	= tracing_release_generic_tr,
6568 };
6569 
6570 static const struct file_operations trace_clock_fops = {
6571 	.open		= tracing_clock_open,
6572 	.read		= seq_read,
6573 	.llseek		= seq_lseek,
6574 	.release	= tracing_single_release_tr,
6575 	.write		= tracing_clock_write,
6576 };
6577 
6578 #ifdef CONFIG_TRACER_SNAPSHOT
6579 static const struct file_operations snapshot_fops = {
6580 	.open		= tracing_snapshot_open,
6581 	.read		= seq_read,
6582 	.write		= tracing_snapshot_write,
6583 	.llseek		= tracing_lseek,
6584 	.release	= tracing_snapshot_release,
6585 };
6586 
6587 static const struct file_operations snapshot_raw_fops = {
6588 	.open		= snapshot_raw_open,
6589 	.read		= tracing_buffers_read,
6590 	.release	= tracing_buffers_release,
6591 	.splice_read	= tracing_buffers_splice_read,
6592 	.llseek		= no_llseek,
6593 };
6594 
6595 #endif /* CONFIG_TRACER_SNAPSHOT */
6596 
tracing_buffers_open(struct inode * inode,struct file * filp)6597 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6598 {
6599 	struct trace_array *tr = inode->i_private;
6600 	struct ftrace_buffer_info *info;
6601 	int ret;
6602 
6603 	if (tracing_disabled)
6604 		return -ENODEV;
6605 
6606 	if (trace_array_get(tr) < 0)
6607 		return -ENODEV;
6608 
6609 	info = kzalloc(sizeof(*info), GFP_KERNEL);
6610 	if (!info) {
6611 		trace_array_put(tr);
6612 		return -ENOMEM;
6613 	}
6614 
6615 	mutex_lock(&trace_types_lock);
6616 
6617 	info->iter.tr		= tr;
6618 	info->iter.cpu_file	= tracing_get_cpu(inode);
6619 	info->iter.trace	= tr->current_trace;
6620 	info->iter.trace_buffer = &tr->trace_buffer;
6621 	info->spare		= NULL;
6622 	/* Force reading ring buffer for first read */
6623 	info->read		= (unsigned int)-1;
6624 
6625 	filp->private_data = info;
6626 
6627 	tr->current_trace->ref++;
6628 
6629 	mutex_unlock(&trace_types_lock);
6630 
6631 	ret = nonseekable_open(inode, filp);
6632 	if (ret < 0)
6633 		trace_array_put(tr);
6634 
6635 	return ret;
6636 }
6637 
6638 static unsigned int
tracing_buffers_poll(struct file * filp,poll_table * poll_table)6639 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6640 {
6641 	struct ftrace_buffer_info *info = filp->private_data;
6642 	struct trace_iterator *iter = &info->iter;
6643 
6644 	return trace_poll(iter, filp, poll_table);
6645 }
6646 
6647 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6648 tracing_buffers_read(struct file *filp, char __user *ubuf,
6649 		     size_t count, loff_t *ppos)
6650 {
6651 	struct ftrace_buffer_info *info = filp->private_data;
6652 	struct trace_iterator *iter = &info->iter;
6653 	ssize_t ret = 0;
6654 	ssize_t size;
6655 
6656 	if (!count)
6657 		return 0;
6658 
6659 #ifdef CONFIG_TRACER_MAX_TRACE
6660 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6661 		return -EBUSY;
6662 #endif
6663 
6664 	if (!info->spare) {
6665 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6666 							  iter->cpu_file);
6667 		if (IS_ERR(info->spare)) {
6668 			ret = PTR_ERR(info->spare);
6669 			info->spare = NULL;
6670 		} else {
6671 			info->spare_cpu = iter->cpu_file;
6672 		}
6673 	}
6674 	if (!info->spare)
6675 		return ret;
6676 
6677 	/* Do we have previous read data to read? */
6678 	if (info->read < PAGE_SIZE)
6679 		goto read;
6680 
6681  again:
6682 	trace_access_lock(iter->cpu_file);
6683 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6684 				    &info->spare,
6685 				    count,
6686 				    iter->cpu_file, 0);
6687 	trace_access_unlock(iter->cpu_file);
6688 
6689 	if (ret < 0) {
6690 		if (trace_empty(iter)) {
6691 			if ((filp->f_flags & O_NONBLOCK))
6692 				return -EAGAIN;
6693 
6694 			ret = wait_on_pipe(iter, false);
6695 			if (ret)
6696 				return ret;
6697 
6698 			goto again;
6699 		}
6700 		return 0;
6701 	}
6702 
6703 	info->read = 0;
6704  read:
6705 	size = PAGE_SIZE - info->read;
6706 	if (size > count)
6707 		size = count;
6708 
6709 	ret = copy_to_user(ubuf, info->spare + info->read, size);
6710 	if (ret == size)
6711 		return -EFAULT;
6712 
6713 	size -= ret;
6714 
6715 	*ppos += size;
6716 	info->read += size;
6717 
6718 	return size;
6719 }
6720 
tracing_buffers_release(struct inode * inode,struct file * file)6721 static int tracing_buffers_release(struct inode *inode, struct file *file)
6722 {
6723 	struct ftrace_buffer_info *info = file->private_data;
6724 	struct trace_iterator *iter = &info->iter;
6725 
6726 	mutex_lock(&trace_types_lock);
6727 
6728 	iter->tr->current_trace->ref--;
6729 
6730 	__trace_array_put(iter->tr);
6731 
6732 	if (info->spare)
6733 		ring_buffer_free_read_page(iter->trace_buffer->buffer,
6734 					   info->spare_cpu, info->spare);
6735 	kfree(info);
6736 
6737 	mutex_unlock(&trace_types_lock);
6738 
6739 	return 0;
6740 }
6741 
6742 struct buffer_ref {
6743 	struct ring_buffer	*buffer;
6744 	void			*page;
6745 	int			cpu;
6746 	refcount_t		refcount;
6747 };
6748 
buffer_ref_release(struct buffer_ref * ref)6749 static void buffer_ref_release(struct buffer_ref *ref)
6750 {
6751 	if (!refcount_dec_and_test(&ref->refcount))
6752 		return;
6753 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6754 	kfree(ref);
6755 }
6756 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6757 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6758 				    struct pipe_buffer *buf)
6759 {
6760 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6761 
6762 	buffer_ref_release(ref);
6763 	buf->private = 0;
6764 }
6765 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)6766 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6767 				struct pipe_buffer *buf)
6768 {
6769 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6770 
6771 	if (refcount_read(&ref->refcount) > INT_MAX/2)
6772 		return false;
6773 
6774 	refcount_inc(&ref->refcount);
6775 	return true;
6776 }
6777 
6778 /* Pipe buffer operations for a buffer. */
6779 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6780 	.can_merge		= 0,
6781 	.confirm		= generic_pipe_buf_confirm,
6782 	.release		= buffer_pipe_buf_release,
6783 	.steal			= generic_pipe_buf_nosteal,
6784 	.get			= buffer_pipe_buf_get,
6785 };
6786 
6787 /*
6788  * Callback from splice_to_pipe(), if we need to release some pages
6789  * at the end of the spd in case we error'ed out in filling the pipe.
6790  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)6791 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6792 {
6793 	struct buffer_ref *ref =
6794 		(struct buffer_ref *)spd->partial[i].private;
6795 
6796 	buffer_ref_release(ref);
6797 	spd->partial[i].private = 0;
6798 }
6799 
6800 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6801 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6802 			    struct pipe_inode_info *pipe, size_t len,
6803 			    unsigned int flags)
6804 {
6805 	struct ftrace_buffer_info *info = file->private_data;
6806 	struct trace_iterator *iter = &info->iter;
6807 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6808 	struct page *pages_def[PIPE_DEF_BUFFERS];
6809 	struct splice_pipe_desc spd = {
6810 		.pages		= pages_def,
6811 		.partial	= partial_def,
6812 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6813 		.ops		= &buffer_pipe_buf_ops,
6814 		.spd_release	= buffer_spd_release,
6815 	};
6816 	struct buffer_ref *ref;
6817 	int entries, i;
6818 	ssize_t ret = 0;
6819 
6820 #ifdef CONFIG_TRACER_MAX_TRACE
6821 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6822 		return -EBUSY;
6823 #endif
6824 
6825 	if (*ppos & (PAGE_SIZE - 1))
6826 		return -EINVAL;
6827 
6828 	if (len & (PAGE_SIZE - 1)) {
6829 		if (len < PAGE_SIZE)
6830 			return -EINVAL;
6831 		len &= PAGE_MASK;
6832 	}
6833 
6834 	if (splice_grow_spd(pipe, &spd))
6835 		return -ENOMEM;
6836 
6837  again:
6838 	trace_access_lock(iter->cpu_file);
6839 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6840 
6841 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6842 		struct page *page;
6843 		int r;
6844 
6845 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6846 		if (!ref) {
6847 			ret = -ENOMEM;
6848 			break;
6849 		}
6850 
6851 		refcount_set(&ref->refcount, 1);
6852 		ref->buffer = iter->trace_buffer->buffer;
6853 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6854 		if (IS_ERR(ref->page)) {
6855 			ret = PTR_ERR(ref->page);
6856 			ref->page = NULL;
6857 			kfree(ref);
6858 			break;
6859 		}
6860 		ref->cpu = iter->cpu_file;
6861 
6862 		r = ring_buffer_read_page(ref->buffer, &ref->page,
6863 					  len, iter->cpu_file, 1);
6864 		if (r < 0) {
6865 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
6866 						   ref->page);
6867 			kfree(ref);
6868 			break;
6869 		}
6870 
6871 		page = virt_to_page(ref->page);
6872 
6873 		spd.pages[i] = page;
6874 		spd.partial[i].len = PAGE_SIZE;
6875 		spd.partial[i].offset = 0;
6876 		spd.partial[i].private = (unsigned long)ref;
6877 		spd.nr_pages++;
6878 		*ppos += PAGE_SIZE;
6879 
6880 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6881 	}
6882 
6883 	trace_access_unlock(iter->cpu_file);
6884 	spd.nr_pages = i;
6885 
6886 	/* did we read anything? */
6887 	if (!spd.nr_pages) {
6888 		if (ret)
6889 			goto out;
6890 
6891 		ret = -EAGAIN;
6892 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6893 			goto out;
6894 
6895 		ret = wait_on_pipe(iter, true);
6896 		if (ret)
6897 			goto out;
6898 
6899 		goto again;
6900 	}
6901 
6902 	ret = splice_to_pipe(pipe, &spd);
6903 out:
6904 	splice_shrink_spd(&spd);
6905 
6906 	return ret;
6907 }
6908 
6909 static const struct file_operations tracing_buffers_fops = {
6910 	.open		= tracing_buffers_open,
6911 	.read		= tracing_buffers_read,
6912 	.poll		= tracing_buffers_poll,
6913 	.release	= tracing_buffers_release,
6914 	.splice_read	= tracing_buffers_splice_read,
6915 	.llseek		= no_llseek,
6916 };
6917 
6918 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)6919 tracing_stats_read(struct file *filp, char __user *ubuf,
6920 		   size_t count, loff_t *ppos)
6921 {
6922 	struct inode *inode = file_inode(filp);
6923 	struct trace_array *tr = inode->i_private;
6924 	struct trace_buffer *trace_buf = &tr->trace_buffer;
6925 	int cpu = tracing_get_cpu(inode);
6926 	struct trace_seq *s;
6927 	unsigned long cnt;
6928 	unsigned long long t;
6929 	unsigned long usec_rem;
6930 
6931 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6932 	if (!s)
6933 		return -ENOMEM;
6934 
6935 	trace_seq_init(s);
6936 
6937 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6938 	trace_seq_printf(s, "entries: %ld\n", cnt);
6939 
6940 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6941 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6942 
6943 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6944 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6945 
6946 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6947 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6948 
6949 	if (trace_clocks[tr->clock_id].in_ns) {
6950 		/* local or global for trace_clock */
6951 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6952 		usec_rem = do_div(t, USEC_PER_SEC);
6953 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6954 								t, usec_rem);
6955 
6956 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6957 		usec_rem = do_div(t, USEC_PER_SEC);
6958 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6959 	} else {
6960 		/* counter or tsc mode for trace_clock */
6961 		trace_seq_printf(s, "oldest event ts: %llu\n",
6962 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6963 
6964 		trace_seq_printf(s, "now ts: %llu\n",
6965 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6966 	}
6967 
6968 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6969 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6970 
6971 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6972 	trace_seq_printf(s, "read events: %ld\n", cnt);
6973 
6974 	count = simple_read_from_buffer(ubuf, count, ppos,
6975 					s->buffer, trace_seq_used(s));
6976 
6977 	kfree(s);
6978 
6979 	return count;
6980 }
6981 
6982 static const struct file_operations tracing_stats_fops = {
6983 	.open		= tracing_open_generic_tr,
6984 	.read		= tracing_stats_read,
6985 	.llseek		= generic_file_llseek,
6986 	.release	= tracing_release_generic_tr,
6987 };
6988 
6989 #ifdef CONFIG_DYNAMIC_FTRACE
6990 
6991 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6992 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6993 		  size_t cnt, loff_t *ppos)
6994 {
6995 	unsigned long *p = filp->private_data;
6996 	char buf[64]; /* Not too big for a shallow stack */
6997 	int r;
6998 
6999 	r = scnprintf(buf, 63, "%ld", *p);
7000 	buf[r++] = '\n';
7001 
7002 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7003 }
7004 
7005 static const struct file_operations tracing_dyn_info_fops = {
7006 	.open		= tracing_open_generic,
7007 	.read		= tracing_read_dyn_info,
7008 	.llseek		= generic_file_llseek,
7009 };
7010 #endif /* CONFIG_DYNAMIC_FTRACE */
7011 
7012 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7013 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7014 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7015 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7016 		void *data)
7017 {
7018 	tracing_snapshot_instance(tr);
7019 }
7020 
7021 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7022 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7023 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7024 		      void *data)
7025 {
7026 	struct ftrace_func_mapper *mapper = data;
7027 	long *count = NULL;
7028 
7029 	if (mapper)
7030 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7031 
7032 	if (count) {
7033 
7034 		if (*count <= 0)
7035 			return;
7036 
7037 		(*count)--;
7038 	}
7039 
7040 	tracing_snapshot_instance(tr);
7041 }
7042 
7043 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7044 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7045 		      struct ftrace_probe_ops *ops, void *data)
7046 {
7047 	struct ftrace_func_mapper *mapper = data;
7048 	long *count = NULL;
7049 
7050 	seq_printf(m, "%ps:", (void *)ip);
7051 
7052 	seq_puts(m, "snapshot");
7053 
7054 	if (mapper)
7055 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7056 
7057 	if (count)
7058 		seq_printf(m, ":count=%ld\n", *count);
7059 	else
7060 		seq_puts(m, ":unlimited\n");
7061 
7062 	return 0;
7063 }
7064 
7065 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7066 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7067 		     unsigned long ip, void *init_data, void **data)
7068 {
7069 	struct ftrace_func_mapper *mapper = *data;
7070 
7071 	if (!mapper) {
7072 		mapper = allocate_ftrace_func_mapper();
7073 		if (!mapper)
7074 			return -ENOMEM;
7075 		*data = mapper;
7076 	}
7077 
7078 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7079 }
7080 
7081 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7082 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7083 		     unsigned long ip, void *data)
7084 {
7085 	struct ftrace_func_mapper *mapper = data;
7086 
7087 	if (!ip) {
7088 		if (!mapper)
7089 			return;
7090 		free_ftrace_func_mapper(mapper, NULL);
7091 		return;
7092 	}
7093 
7094 	ftrace_func_mapper_remove_ip(mapper, ip);
7095 }
7096 
7097 static struct ftrace_probe_ops snapshot_probe_ops = {
7098 	.func			= ftrace_snapshot,
7099 	.print			= ftrace_snapshot_print,
7100 };
7101 
7102 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7103 	.func			= ftrace_count_snapshot,
7104 	.print			= ftrace_snapshot_print,
7105 	.init			= ftrace_snapshot_init,
7106 	.free			= ftrace_snapshot_free,
7107 };
7108 
7109 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7110 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7111 			       char *glob, char *cmd, char *param, int enable)
7112 {
7113 	struct ftrace_probe_ops *ops;
7114 	void *count = (void *)-1;
7115 	char *number;
7116 	int ret;
7117 
7118 	if (!tr)
7119 		return -ENODEV;
7120 
7121 	/* hash funcs only work with set_ftrace_filter */
7122 	if (!enable)
7123 		return -EINVAL;
7124 
7125 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7126 
7127 	if (glob[0] == '!')
7128 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7129 
7130 	if (!param)
7131 		goto out_reg;
7132 
7133 	number = strsep(&param, ":");
7134 
7135 	if (!strlen(number))
7136 		goto out_reg;
7137 
7138 	/*
7139 	 * We use the callback data field (which is a pointer)
7140 	 * as our counter.
7141 	 */
7142 	ret = kstrtoul(number, 0, (unsigned long *)&count);
7143 	if (ret)
7144 		return ret;
7145 
7146  out_reg:
7147 	ret = tracing_alloc_snapshot_instance(tr);
7148 	if (ret < 0)
7149 		goto out;
7150 
7151 	ret = register_ftrace_function_probe(glob, tr, ops, count);
7152 
7153  out:
7154 	return ret < 0 ? ret : 0;
7155 }
7156 
7157 static struct ftrace_func_command ftrace_snapshot_cmd = {
7158 	.name			= "snapshot",
7159 	.func			= ftrace_trace_snapshot_callback,
7160 };
7161 
register_snapshot_cmd(void)7162 static __init int register_snapshot_cmd(void)
7163 {
7164 	return register_ftrace_command(&ftrace_snapshot_cmd);
7165 }
7166 #else
register_snapshot_cmd(void)7167 static inline __init int register_snapshot_cmd(void) { return 0; }
7168 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7169 
tracing_get_dentry(struct trace_array * tr)7170 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7171 {
7172 	if (WARN_ON(!tr->dir))
7173 		return ERR_PTR(-ENODEV);
7174 
7175 	/* Top directory uses NULL as the parent */
7176 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7177 		return NULL;
7178 
7179 	/* All sub buffers have a descriptor */
7180 	return tr->dir;
7181 }
7182 
tracing_dentry_percpu(struct trace_array * tr,int cpu)7183 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7184 {
7185 	struct dentry *d_tracer;
7186 
7187 	if (tr->percpu_dir)
7188 		return tr->percpu_dir;
7189 
7190 	d_tracer = tracing_get_dentry(tr);
7191 	if (IS_ERR(d_tracer))
7192 		return NULL;
7193 
7194 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7195 
7196 	WARN_ONCE(!tr->percpu_dir,
7197 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7198 
7199 	return tr->percpu_dir;
7200 }
7201 
7202 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)7203 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7204 		      void *data, long cpu, const struct file_operations *fops)
7205 {
7206 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7207 
7208 	if (ret) /* See tracing_get_cpu() */
7209 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
7210 	return ret;
7211 }
7212 
7213 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)7214 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7215 {
7216 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7217 	struct dentry *d_cpu;
7218 	char cpu_dir[30]; /* 30 characters should be more than enough */
7219 
7220 	if (!d_percpu)
7221 		return;
7222 
7223 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
7224 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7225 	if (!d_cpu) {
7226 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7227 		return;
7228 	}
7229 
7230 	/* per cpu trace_pipe */
7231 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7232 				tr, cpu, &tracing_pipe_fops);
7233 
7234 	/* per cpu trace */
7235 	trace_create_cpu_file("trace", 0644, d_cpu,
7236 				tr, cpu, &tracing_fops);
7237 
7238 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7239 				tr, cpu, &tracing_buffers_fops);
7240 
7241 	trace_create_cpu_file("stats", 0444, d_cpu,
7242 				tr, cpu, &tracing_stats_fops);
7243 
7244 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7245 				tr, cpu, &tracing_entries_fops);
7246 
7247 #ifdef CONFIG_TRACER_SNAPSHOT
7248 	trace_create_cpu_file("snapshot", 0644, d_cpu,
7249 				tr, cpu, &snapshot_fops);
7250 
7251 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7252 				tr, cpu, &snapshot_raw_fops);
7253 #endif
7254 }
7255 
7256 #ifdef CONFIG_FTRACE_SELFTEST
7257 /* Let selftest have access to static functions in this file */
7258 #include "trace_selftest.c"
7259 #endif
7260 
7261 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7262 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7263 			loff_t *ppos)
7264 {
7265 	struct trace_option_dentry *topt = filp->private_data;
7266 	char *buf;
7267 
7268 	if (topt->flags->val & topt->opt->bit)
7269 		buf = "1\n";
7270 	else
7271 		buf = "0\n";
7272 
7273 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7274 }
7275 
7276 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7277 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7278 			 loff_t *ppos)
7279 {
7280 	struct trace_option_dentry *topt = filp->private_data;
7281 	unsigned long val;
7282 	int ret;
7283 
7284 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7285 	if (ret)
7286 		return ret;
7287 
7288 	if (val != 0 && val != 1)
7289 		return -EINVAL;
7290 
7291 	if (!!(topt->flags->val & topt->opt->bit) != val) {
7292 		mutex_lock(&trace_types_lock);
7293 		ret = __set_tracer_option(topt->tr, topt->flags,
7294 					  topt->opt, !val);
7295 		mutex_unlock(&trace_types_lock);
7296 		if (ret)
7297 			return ret;
7298 	}
7299 
7300 	*ppos += cnt;
7301 
7302 	return cnt;
7303 }
7304 
7305 
7306 static const struct file_operations trace_options_fops = {
7307 	.open = tracing_open_generic,
7308 	.read = trace_options_read,
7309 	.write = trace_options_write,
7310 	.llseek	= generic_file_llseek,
7311 };
7312 
7313 /*
7314  * In order to pass in both the trace_array descriptor as well as the index
7315  * to the flag that the trace option file represents, the trace_array
7316  * has a character array of trace_flags_index[], which holds the index
7317  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7318  * The address of this character array is passed to the flag option file
7319  * read/write callbacks.
7320  *
7321  * In order to extract both the index and the trace_array descriptor,
7322  * get_tr_index() uses the following algorithm.
7323  *
7324  *   idx = *ptr;
7325  *
7326  * As the pointer itself contains the address of the index (remember
7327  * index[1] == 1).
7328  *
7329  * Then to get the trace_array descriptor, by subtracting that index
7330  * from the ptr, we get to the start of the index itself.
7331  *
7332  *   ptr - idx == &index[0]
7333  *
7334  * Then a simple container_of() from that pointer gets us to the
7335  * trace_array descriptor.
7336  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)7337 static void get_tr_index(void *data, struct trace_array **ptr,
7338 			 unsigned int *pindex)
7339 {
7340 	*pindex = *(unsigned char *)data;
7341 
7342 	*ptr = container_of(data - *pindex, struct trace_array,
7343 			    trace_flags_index);
7344 }
7345 
7346 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7347 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7348 			loff_t *ppos)
7349 {
7350 	void *tr_index = filp->private_data;
7351 	struct trace_array *tr;
7352 	unsigned int index;
7353 	char *buf;
7354 
7355 	get_tr_index(tr_index, &tr, &index);
7356 
7357 	if (tr->trace_flags & (1 << index))
7358 		buf = "1\n";
7359 	else
7360 		buf = "0\n";
7361 
7362 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7363 }
7364 
7365 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7366 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7367 			 loff_t *ppos)
7368 {
7369 	void *tr_index = filp->private_data;
7370 	struct trace_array *tr;
7371 	unsigned int index;
7372 	unsigned long val;
7373 	int ret;
7374 
7375 	get_tr_index(tr_index, &tr, &index);
7376 
7377 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7378 	if (ret)
7379 		return ret;
7380 
7381 	if (val != 0 && val != 1)
7382 		return -EINVAL;
7383 
7384 	mutex_lock(&event_mutex);
7385 	mutex_lock(&trace_types_lock);
7386 	ret = set_tracer_flag(tr, 1 << index, val);
7387 	mutex_unlock(&trace_types_lock);
7388 	mutex_unlock(&event_mutex);
7389 
7390 	if (ret < 0)
7391 		return ret;
7392 
7393 	*ppos += cnt;
7394 
7395 	return cnt;
7396 }
7397 
7398 static const struct file_operations trace_options_core_fops = {
7399 	.open = tracing_open_generic,
7400 	.read = trace_options_core_read,
7401 	.write = trace_options_core_write,
7402 	.llseek = generic_file_llseek,
7403 };
7404 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)7405 struct dentry *trace_create_file(const char *name,
7406 				 umode_t mode,
7407 				 struct dentry *parent,
7408 				 void *data,
7409 				 const struct file_operations *fops)
7410 {
7411 	struct dentry *ret;
7412 
7413 	ret = tracefs_create_file(name, mode, parent, data, fops);
7414 	if (!ret)
7415 		pr_warn("Could not create tracefs '%s' entry\n", name);
7416 
7417 	return ret;
7418 }
7419 
7420 
trace_options_init_dentry(struct trace_array * tr)7421 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7422 {
7423 	struct dentry *d_tracer;
7424 
7425 	if (tr->options)
7426 		return tr->options;
7427 
7428 	d_tracer = tracing_get_dentry(tr);
7429 	if (IS_ERR(d_tracer))
7430 		return NULL;
7431 
7432 	tr->options = tracefs_create_dir("options", d_tracer);
7433 	if (!tr->options) {
7434 		pr_warn("Could not create tracefs directory 'options'\n");
7435 		return NULL;
7436 	}
7437 
7438 	return tr->options;
7439 }
7440 
7441 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)7442 create_trace_option_file(struct trace_array *tr,
7443 			 struct trace_option_dentry *topt,
7444 			 struct tracer_flags *flags,
7445 			 struct tracer_opt *opt)
7446 {
7447 	struct dentry *t_options;
7448 
7449 	t_options = trace_options_init_dentry(tr);
7450 	if (!t_options)
7451 		return;
7452 
7453 	topt->flags = flags;
7454 	topt->opt = opt;
7455 	topt->tr = tr;
7456 
7457 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7458 				    &trace_options_fops);
7459 
7460 }
7461 
7462 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)7463 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7464 {
7465 	struct trace_option_dentry *topts;
7466 	struct trace_options *tr_topts;
7467 	struct tracer_flags *flags;
7468 	struct tracer_opt *opts;
7469 	int cnt;
7470 	int i;
7471 
7472 	if (!tracer)
7473 		return;
7474 
7475 	flags = tracer->flags;
7476 
7477 	if (!flags || !flags->opts)
7478 		return;
7479 
7480 	/*
7481 	 * If this is an instance, only create flags for tracers
7482 	 * the instance may have.
7483 	 */
7484 	if (!trace_ok_for_array(tracer, tr))
7485 		return;
7486 
7487 	for (i = 0; i < tr->nr_topts; i++) {
7488 		/* Make sure there's no duplicate flags. */
7489 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7490 			return;
7491 	}
7492 
7493 	opts = flags->opts;
7494 
7495 	for (cnt = 0; opts[cnt].name; cnt++)
7496 		;
7497 
7498 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7499 	if (!topts)
7500 		return;
7501 
7502 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7503 			    GFP_KERNEL);
7504 	if (!tr_topts) {
7505 		kfree(topts);
7506 		return;
7507 	}
7508 
7509 	tr->topts = tr_topts;
7510 	tr->topts[tr->nr_topts].tracer = tracer;
7511 	tr->topts[tr->nr_topts].topts = topts;
7512 	tr->nr_topts++;
7513 
7514 	for (cnt = 0; opts[cnt].name; cnt++) {
7515 		create_trace_option_file(tr, &topts[cnt], flags,
7516 					 &opts[cnt]);
7517 		WARN_ONCE(topts[cnt].entry == NULL,
7518 			  "Failed to create trace option: %s",
7519 			  opts[cnt].name);
7520 	}
7521 }
7522 
7523 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)7524 create_trace_option_core_file(struct trace_array *tr,
7525 			      const char *option, long index)
7526 {
7527 	struct dentry *t_options;
7528 
7529 	t_options = trace_options_init_dentry(tr);
7530 	if (!t_options)
7531 		return NULL;
7532 
7533 	return trace_create_file(option, 0644, t_options,
7534 				 (void *)&tr->trace_flags_index[index],
7535 				 &trace_options_core_fops);
7536 }
7537 
create_trace_options_dir(struct trace_array * tr)7538 static void create_trace_options_dir(struct trace_array *tr)
7539 {
7540 	struct dentry *t_options;
7541 	bool top_level = tr == &global_trace;
7542 	int i;
7543 
7544 	t_options = trace_options_init_dentry(tr);
7545 	if (!t_options)
7546 		return;
7547 
7548 	for (i = 0; trace_options[i]; i++) {
7549 		if (top_level ||
7550 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7551 			create_trace_option_core_file(tr, trace_options[i], i);
7552 	}
7553 }
7554 
7555 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7556 rb_simple_read(struct file *filp, char __user *ubuf,
7557 	       size_t cnt, loff_t *ppos)
7558 {
7559 	struct trace_array *tr = filp->private_data;
7560 	char buf[64];
7561 	int r;
7562 
7563 	r = tracer_tracing_is_on(tr);
7564 	r = sprintf(buf, "%d\n", r);
7565 
7566 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7567 }
7568 
7569 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7570 rb_simple_write(struct file *filp, const char __user *ubuf,
7571 		size_t cnt, loff_t *ppos)
7572 {
7573 	struct trace_array *tr = filp->private_data;
7574 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
7575 	unsigned long val;
7576 	int ret;
7577 
7578 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7579 	if (ret)
7580 		return ret;
7581 
7582 	if (buffer) {
7583 		mutex_lock(&trace_types_lock);
7584 		if (!!val == tracer_tracing_is_on(tr)) {
7585 			val = 0; /* do nothing */
7586 		} else if (val) {
7587 			tracer_tracing_on(tr);
7588 			if (tr->current_trace->start)
7589 				tr->current_trace->start(tr);
7590 		} else {
7591 			tracer_tracing_off(tr);
7592 			if (tr->current_trace->stop)
7593 				tr->current_trace->stop(tr);
7594 		}
7595 		mutex_unlock(&trace_types_lock);
7596 	}
7597 
7598 	(*ppos)++;
7599 
7600 	return cnt;
7601 }
7602 
7603 static const struct file_operations rb_simple_fops = {
7604 	.open		= tracing_open_generic_tr,
7605 	.read		= rb_simple_read,
7606 	.write		= rb_simple_write,
7607 	.release	= tracing_release_generic_tr,
7608 	.llseek		= default_llseek,
7609 };
7610 
7611 struct dentry *trace_instance_dir;
7612 
7613 static void
7614 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7615 
7616 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)7617 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7618 {
7619 	enum ring_buffer_flags rb_flags;
7620 
7621 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7622 
7623 	buf->tr = tr;
7624 
7625 	buf->buffer = ring_buffer_alloc(size, rb_flags);
7626 	if (!buf->buffer)
7627 		return -ENOMEM;
7628 
7629 	buf->data = alloc_percpu(struct trace_array_cpu);
7630 	if (!buf->data) {
7631 		ring_buffer_free(buf->buffer);
7632 		buf->buffer = NULL;
7633 		return -ENOMEM;
7634 	}
7635 
7636 	/* Allocate the first page for all buffers */
7637 	set_buffer_entries(&tr->trace_buffer,
7638 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
7639 
7640 	return 0;
7641 }
7642 
allocate_trace_buffers(struct trace_array * tr,int size)7643 static int allocate_trace_buffers(struct trace_array *tr, int size)
7644 {
7645 	int ret;
7646 
7647 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7648 	if (ret)
7649 		return ret;
7650 
7651 #ifdef CONFIG_TRACER_MAX_TRACE
7652 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
7653 				    allocate_snapshot ? size : 1);
7654 	if (WARN_ON(ret)) {
7655 		ring_buffer_free(tr->trace_buffer.buffer);
7656 		tr->trace_buffer.buffer = NULL;
7657 		free_percpu(tr->trace_buffer.data);
7658 		tr->trace_buffer.data = NULL;
7659 		return -ENOMEM;
7660 	}
7661 	tr->allocated_snapshot = allocate_snapshot;
7662 
7663 	/*
7664 	 * Only the top level trace array gets its snapshot allocated
7665 	 * from the kernel command line.
7666 	 */
7667 	allocate_snapshot = false;
7668 #endif
7669 	return 0;
7670 }
7671 
free_trace_buffer(struct trace_buffer * buf)7672 static void free_trace_buffer(struct trace_buffer *buf)
7673 {
7674 	if (buf->buffer) {
7675 		ring_buffer_free(buf->buffer);
7676 		buf->buffer = NULL;
7677 		free_percpu(buf->data);
7678 		buf->data = NULL;
7679 	}
7680 }
7681 
free_trace_buffers(struct trace_array * tr)7682 static void free_trace_buffers(struct trace_array *tr)
7683 {
7684 	if (!tr)
7685 		return;
7686 
7687 	free_trace_buffer(&tr->trace_buffer);
7688 
7689 #ifdef CONFIG_TRACER_MAX_TRACE
7690 	free_trace_buffer(&tr->max_buffer);
7691 #endif
7692 }
7693 
init_trace_flags_index(struct trace_array * tr)7694 static void init_trace_flags_index(struct trace_array *tr)
7695 {
7696 	int i;
7697 
7698 	/* Used by the trace options files */
7699 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7700 		tr->trace_flags_index[i] = i;
7701 }
7702 
__update_tracer_options(struct trace_array * tr)7703 static void __update_tracer_options(struct trace_array *tr)
7704 {
7705 	struct tracer *t;
7706 
7707 	for (t = trace_types; t; t = t->next)
7708 		add_tracer_options(tr, t);
7709 }
7710 
update_tracer_options(struct trace_array * tr)7711 static void update_tracer_options(struct trace_array *tr)
7712 {
7713 	mutex_lock(&trace_types_lock);
7714 	__update_tracer_options(tr);
7715 	mutex_unlock(&trace_types_lock);
7716 }
7717 
instance_mkdir(const char * name)7718 static int instance_mkdir(const char *name)
7719 {
7720 	struct trace_array *tr;
7721 	int ret;
7722 
7723 	mutex_lock(&trace_types_lock);
7724 
7725 	ret = -EEXIST;
7726 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7727 		if (tr->name && strcmp(tr->name, name) == 0)
7728 			goto out_unlock;
7729 	}
7730 
7731 	ret = -ENOMEM;
7732 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7733 	if (!tr)
7734 		goto out_unlock;
7735 
7736 	tr->name = kstrdup(name, GFP_KERNEL);
7737 	if (!tr->name)
7738 		goto out_free_tr;
7739 
7740 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7741 		goto out_free_tr;
7742 
7743 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7744 
7745 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7746 
7747 	raw_spin_lock_init(&tr->start_lock);
7748 
7749 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7750 
7751 	tr->current_trace = &nop_trace;
7752 
7753 	INIT_LIST_HEAD(&tr->systems);
7754 	INIT_LIST_HEAD(&tr->events);
7755 
7756 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7757 		goto out_free_tr;
7758 
7759 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
7760 	if (!tr->dir)
7761 		goto out_free_tr;
7762 
7763 	ret = event_trace_add_tracer(tr->dir, tr);
7764 	if (ret) {
7765 		tracefs_remove_recursive(tr->dir);
7766 		goto out_free_tr;
7767 	}
7768 
7769 	ftrace_init_trace_array(tr);
7770 
7771 	init_tracer_tracefs(tr, tr->dir);
7772 	init_trace_flags_index(tr);
7773 	__update_tracer_options(tr);
7774 
7775 	list_add(&tr->list, &ftrace_trace_arrays);
7776 
7777 	mutex_unlock(&trace_types_lock);
7778 
7779 	return 0;
7780 
7781  out_free_tr:
7782 	free_trace_buffers(tr);
7783 	free_cpumask_var(tr->tracing_cpumask);
7784 	kfree(tr->name);
7785 	kfree(tr);
7786 
7787  out_unlock:
7788 	mutex_unlock(&trace_types_lock);
7789 
7790 	return ret;
7791 
7792 }
7793 
instance_rmdir(const char * name)7794 static int instance_rmdir(const char *name)
7795 {
7796 	struct trace_array *tr;
7797 	int found = 0;
7798 	int ret;
7799 	int i;
7800 
7801 	mutex_lock(&trace_types_lock);
7802 
7803 	ret = -ENODEV;
7804 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7805 		if (tr->name && strcmp(tr->name, name) == 0) {
7806 			found = 1;
7807 			break;
7808 		}
7809 	}
7810 	if (!found)
7811 		goto out_unlock;
7812 
7813 	ret = -EBUSY;
7814 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7815 		goto out_unlock;
7816 
7817 	list_del(&tr->list);
7818 
7819 	/* Disable all the flags that were enabled coming in */
7820 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7821 		if ((1 << i) & ZEROED_TRACE_FLAGS)
7822 			set_tracer_flag(tr, 1 << i, 0);
7823 	}
7824 
7825 	tracing_set_nop(tr);
7826 	clear_ftrace_function_probes(tr);
7827 	event_trace_del_tracer(tr);
7828 	ftrace_clear_pids(tr);
7829 	ftrace_destroy_function_files(tr);
7830 	tracefs_remove_recursive(tr->dir);
7831 	free_trace_buffers(tr);
7832 
7833 	for (i = 0; i < tr->nr_topts; i++) {
7834 		kfree(tr->topts[i].topts);
7835 	}
7836 	kfree(tr->topts);
7837 
7838 	free_cpumask_var(tr->tracing_cpumask);
7839 	kfree(tr->name);
7840 	kfree(tr);
7841 
7842 	ret = 0;
7843 
7844  out_unlock:
7845 	mutex_unlock(&trace_types_lock);
7846 
7847 	return ret;
7848 }
7849 
create_trace_instances(struct dentry * d_tracer)7850 static __init void create_trace_instances(struct dentry *d_tracer)
7851 {
7852 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7853 							 instance_mkdir,
7854 							 instance_rmdir);
7855 	if (WARN_ON(!trace_instance_dir))
7856 		return;
7857 }
7858 
7859 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)7860 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7861 {
7862 	int cpu;
7863 
7864 	trace_create_file("available_tracers", 0444, d_tracer,
7865 			tr, &show_traces_fops);
7866 
7867 	trace_create_file("current_tracer", 0644, d_tracer,
7868 			tr, &set_tracer_fops);
7869 
7870 	trace_create_file("tracing_cpumask", 0644, d_tracer,
7871 			  tr, &tracing_cpumask_fops);
7872 
7873 	trace_create_file("trace_options", 0644, d_tracer,
7874 			  tr, &tracing_iter_fops);
7875 
7876 	trace_create_file("trace", 0644, d_tracer,
7877 			  tr, &tracing_fops);
7878 
7879 	trace_create_file("trace_pipe", 0444, d_tracer,
7880 			  tr, &tracing_pipe_fops);
7881 
7882 	trace_create_file("buffer_size_kb", 0644, d_tracer,
7883 			  tr, &tracing_entries_fops);
7884 
7885 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7886 			  tr, &tracing_total_entries_fops);
7887 
7888 	trace_create_file("free_buffer", 0200, d_tracer,
7889 			  tr, &tracing_free_buffer_fops);
7890 
7891 	trace_create_file("trace_marker", 0220, d_tracer,
7892 			  tr, &tracing_mark_fops);
7893 
7894 	trace_create_file("trace_marker_raw", 0220, d_tracer,
7895 			  tr, &tracing_mark_raw_fops);
7896 
7897 	trace_create_file("trace_clock", 0644, d_tracer, tr,
7898 			  &trace_clock_fops);
7899 
7900 	trace_create_file("tracing_on", 0644, d_tracer,
7901 			  tr, &rb_simple_fops);
7902 
7903 	create_trace_options_dir(tr);
7904 
7905 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7906 	trace_create_file("tracing_max_latency", 0644, d_tracer,
7907 			&tr->max_latency, &tracing_max_lat_fops);
7908 #endif
7909 
7910 	if (ftrace_create_function_files(tr, d_tracer))
7911 		WARN(1, "Could not allocate function filter files");
7912 
7913 #ifdef CONFIG_TRACER_SNAPSHOT
7914 	trace_create_file("snapshot", 0644, d_tracer,
7915 			  tr, &snapshot_fops);
7916 #endif
7917 
7918 	for_each_tracing_cpu(cpu)
7919 		tracing_init_tracefs_percpu(tr, cpu);
7920 
7921 	ftrace_init_tracefs(tr, d_tracer);
7922 }
7923 
trace_automount(struct dentry * mntpt,void * ingore)7924 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7925 {
7926 	struct vfsmount *mnt;
7927 	struct file_system_type *type;
7928 
7929 	/*
7930 	 * To maintain backward compatibility for tools that mount
7931 	 * debugfs to get to the tracing facility, tracefs is automatically
7932 	 * mounted to the debugfs/tracing directory.
7933 	 */
7934 	type = get_fs_type("tracefs");
7935 	if (!type)
7936 		return NULL;
7937 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7938 	put_filesystem(type);
7939 	if (IS_ERR(mnt))
7940 		return NULL;
7941 	mntget(mnt);
7942 
7943 	return mnt;
7944 }
7945 
7946 /**
7947  * tracing_init_dentry - initialize top level trace array
7948  *
7949  * This is called when creating files or directories in the tracing
7950  * directory. It is called via fs_initcall() by any of the boot up code
7951  * and expects to return the dentry of the top level tracing directory.
7952  */
tracing_init_dentry(void)7953 struct dentry *tracing_init_dentry(void)
7954 {
7955 	struct trace_array *tr = &global_trace;
7956 
7957 	/* The top level trace array uses  NULL as parent */
7958 	if (tr->dir)
7959 		return NULL;
7960 
7961 	if (WARN_ON(!tracefs_initialized()) ||
7962 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
7963 		 WARN_ON(!debugfs_initialized())))
7964 		return ERR_PTR(-ENODEV);
7965 
7966 	/*
7967 	 * As there may still be users that expect the tracing
7968 	 * files to exist in debugfs/tracing, we must automount
7969 	 * the tracefs file system there, so older tools still
7970 	 * work with the newer kerenl.
7971 	 */
7972 	tr->dir = debugfs_create_automount("tracing", NULL,
7973 					   trace_automount, NULL);
7974 	if (!tr->dir) {
7975 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7976 		return ERR_PTR(-ENOMEM);
7977 	}
7978 
7979 	return NULL;
7980 }
7981 
7982 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7983 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7984 
trace_eval_init(void)7985 static void __init trace_eval_init(void)
7986 {
7987 	int len;
7988 
7989 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7990 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7991 }
7992 
7993 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)7994 static void trace_module_add_evals(struct module *mod)
7995 {
7996 	if (!mod->num_trace_evals)
7997 		return;
7998 
7999 	/*
8000 	 * Modules with bad taint do not have events created, do
8001 	 * not bother with enums either.
8002 	 */
8003 	if (trace_module_has_bad_taint(mod))
8004 		return;
8005 
8006 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8007 }
8008 
8009 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)8010 static void trace_module_remove_evals(struct module *mod)
8011 {
8012 	union trace_eval_map_item *map;
8013 	union trace_eval_map_item **last = &trace_eval_maps;
8014 
8015 	if (!mod->num_trace_evals)
8016 		return;
8017 
8018 	mutex_lock(&trace_eval_mutex);
8019 
8020 	map = trace_eval_maps;
8021 
8022 	while (map) {
8023 		if (map->head.mod == mod)
8024 			break;
8025 		map = trace_eval_jmp_to_tail(map);
8026 		last = &map->tail.next;
8027 		map = map->tail.next;
8028 	}
8029 	if (!map)
8030 		goto out;
8031 
8032 	*last = trace_eval_jmp_to_tail(map)->tail.next;
8033 	kfree(map);
8034  out:
8035 	mutex_unlock(&trace_eval_mutex);
8036 }
8037 #else
trace_module_remove_evals(struct module * mod)8038 static inline void trace_module_remove_evals(struct module *mod) { }
8039 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8040 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)8041 static int trace_module_notify(struct notifier_block *self,
8042 			       unsigned long val, void *data)
8043 {
8044 	struct module *mod = data;
8045 
8046 	switch (val) {
8047 	case MODULE_STATE_COMING:
8048 		trace_module_add_evals(mod);
8049 		break;
8050 	case MODULE_STATE_GOING:
8051 		trace_module_remove_evals(mod);
8052 		break;
8053 	}
8054 
8055 	return 0;
8056 }
8057 
8058 static struct notifier_block trace_module_nb = {
8059 	.notifier_call = trace_module_notify,
8060 	.priority = 0,
8061 };
8062 #endif /* CONFIG_MODULES */
8063 
tracer_init_tracefs(void)8064 static __init int tracer_init_tracefs(void)
8065 {
8066 	struct dentry *d_tracer;
8067 
8068 	trace_access_lock_init();
8069 
8070 	d_tracer = tracing_init_dentry();
8071 	if (IS_ERR(d_tracer))
8072 		return 0;
8073 
8074 	init_tracer_tracefs(&global_trace, d_tracer);
8075 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8076 
8077 	trace_create_file("tracing_thresh", 0644, d_tracer,
8078 			&global_trace, &tracing_thresh_fops);
8079 
8080 	trace_create_file("README", 0444, d_tracer,
8081 			NULL, &tracing_readme_fops);
8082 
8083 	trace_create_file("saved_cmdlines", 0444, d_tracer,
8084 			NULL, &tracing_saved_cmdlines_fops);
8085 
8086 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8087 			  NULL, &tracing_saved_cmdlines_size_fops);
8088 
8089 	trace_create_file("saved_tgids", 0444, d_tracer,
8090 			NULL, &tracing_saved_tgids_fops);
8091 
8092 	trace_eval_init();
8093 
8094 	trace_create_eval_file(d_tracer);
8095 
8096 #ifdef CONFIG_MODULES
8097 	register_module_notifier(&trace_module_nb);
8098 #endif
8099 
8100 #ifdef CONFIG_DYNAMIC_FTRACE
8101 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8102 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8103 #endif
8104 
8105 	create_trace_instances(d_tracer);
8106 
8107 	update_tracer_options(&global_trace);
8108 
8109 	return 0;
8110 }
8111 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)8112 static int trace_panic_handler(struct notifier_block *this,
8113 			       unsigned long event, void *unused)
8114 {
8115 	if (ftrace_dump_on_oops)
8116 		ftrace_dump(ftrace_dump_on_oops);
8117 	return NOTIFY_OK;
8118 }
8119 
8120 static struct notifier_block trace_panic_notifier = {
8121 	.notifier_call  = trace_panic_handler,
8122 	.next           = NULL,
8123 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
8124 };
8125 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)8126 static int trace_die_handler(struct notifier_block *self,
8127 			     unsigned long val,
8128 			     void *data)
8129 {
8130 	switch (val) {
8131 	case DIE_OOPS:
8132 		if (ftrace_dump_on_oops)
8133 			ftrace_dump(ftrace_dump_on_oops);
8134 		break;
8135 	default:
8136 		break;
8137 	}
8138 	return NOTIFY_OK;
8139 }
8140 
8141 static struct notifier_block trace_die_notifier = {
8142 	.notifier_call = trace_die_handler,
8143 	.priority = 200
8144 };
8145 
8146 /*
8147  * printk is set to max of 1024, we really don't need it that big.
8148  * Nothing should be printing 1000 characters anyway.
8149  */
8150 #define TRACE_MAX_PRINT		1000
8151 
8152 /*
8153  * Define here KERN_TRACE so that we have one place to modify
8154  * it if we decide to change what log level the ftrace dump
8155  * should be at.
8156  */
8157 #define KERN_TRACE		KERN_EMERG
8158 
8159 void
trace_printk_seq(struct trace_seq * s)8160 trace_printk_seq(struct trace_seq *s)
8161 {
8162 	/* Probably should print a warning here. */
8163 	if (s->seq.len >= TRACE_MAX_PRINT)
8164 		s->seq.len = TRACE_MAX_PRINT;
8165 
8166 	/*
8167 	 * More paranoid code. Although the buffer size is set to
8168 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8169 	 * an extra layer of protection.
8170 	 */
8171 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8172 		s->seq.len = s->seq.size - 1;
8173 
8174 	/* should be zero ended, but we are paranoid. */
8175 	s->buffer[s->seq.len] = 0;
8176 
8177 	printk(KERN_TRACE "%s", s->buffer);
8178 
8179 	trace_seq_init(s);
8180 }
8181 
trace_init_global_iter(struct trace_iterator * iter)8182 void trace_init_global_iter(struct trace_iterator *iter)
8183 {
8184 	iter->tr = &global_trace;
8185 	iter->trace = iter->tr->current_trace;
8186 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
8187 	iter->trace_buffer = &global_trace.trace_buffer;
8188 
8189 	if (iter->trace && iter->trace->open)
8190 		iter->trace->open(iter);
8191 
8192 	/* Annotate start of buffers if we had overruns */
8193 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
8194 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
8195 
8196 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
8197 	if (trace_clocks[iter->tr->clock_id].in_ns)
8198 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8199 }
8200 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)8201 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8202 {
8203 	/* use static because iter can be a bit big for the stack */
8204 	static struct trace_iterator iter;
8205 	static atomic_t dump_running;
8206 	struct trace_array *tr = &global_trace;
8207 	unsigned int old_userobj;
8208 	unsigned long flags;
8209 	int cnt = 0, cpu;
8210 
8211 	/* Only allow one dump user at a time. */
8212 	if (atomic_inc_return(&dump_running) != 1) {
8213 		atomic_dec(&dump_running);
8214 		return;
8215 	}
8216 
8217 	/*
8218 	 * Always turn off tracing when we dump.
8219 	 * We don't need to show trace output of what happens
8220 	 * between multiple crashes.
8221 	 *
8222 	 * If the user does a sysrq-z, then they can re-enable
8223 	 * tracing with echo 1 > tracing_on.
8224 	 */
8225 	tracing_off();
8226 
8227 	local_irq_save(flags);
8228 	printk_nmi_direct_enter();
8229 
8230 	/* Simulate the iterator */
8231 	trace_init_global_iter(&iter);
8232 
8233 	for_each_tracing_cpu(cpu) {
8234 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8235 	}
8236 
8237 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8238 
8239 	/* don't look at user memory in panic mode */
8240 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8241 
8242 	switch (oops_dump_mode) {
8243 	case DUMP_ALL:
8244 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8245 		break;
8246 	case DUMP_ORIG:
8247 		iter.cpu_file = raw_smp_processor_id();
8248 		break;
8249 	case DUMP_NONE:
8250 		goto out_enable;
8251 	default:
8252 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8253 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
8254 	}
8255 
8256 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
8257 
8258 	/* Did function tracer already get disabled? */
8259 	if (ftrace_is_dead()) {
8260 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8261 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8262 	}
8263 
8264 	/*
8265 	 * We need to stop all tracing on all CPUS to read the
8266 	 * the next buffer. This is a bit expensive, but is
8267 	 * not done often. We fill all what we can read,
8268 	 * and then release the locks again.
8269 	 */
8270 
8271 	while (!trace_empty(&iter)) {
8272 
8273 		if (!cnt)
8274 			printk(KERN_TRACE "---------------------------------\n");
8275 
8276 		cnt++;
8277 
8278 		trace_iterator_reset(&iter);
8279 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
8280 
8281 		if (trace_find_next_entry_inc(&iter) != NULL) {
8282 			int ret;
8283 
8284 			ret = print_trace_line(&iter);
8285 			if (ret != TRACE_TYPE_NO_CONSUME)
8286 				trace_consume(&iter);
8287 		}
8288 		touch_nmi_watchdog();
8289 
8290 		trace_printk_seq(&iter.seq);
8291 	}
8292 
8293 	if (!cnt)
8294 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
8295 	else
8296 		printk(KERN_TRACE "---------------------------------\n");
8297 
8298  out_enable:
8299 	tr->trace_flags |= old_userobj;
8300 
8301 	for_each_tracing_cpu(cpu) {
8302 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8303 	}
8304 	atomic_dec(&dump_running);
8305 	printk_nmi_direct_exit();
8306 	local_irq_restore(flags);
8307 }
8308 EXPORT_SYMBOL_GPL(ftrace_dump);
8309 
tracer_alloc_buffers(void)8310 __init static int tracer_alloc_buffers(void)
8311 {
8312 	int ring_buf_size;
8313 	int ret = -ENOMEM;
8314 
8315 	/*
8316 	 * Make sure we don't accidently add more trace options
8317 	 * than we have bits for.
8318 	 */
8319 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8320 
8321 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8322 		goto out;
8323 
8324 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8325 		goto out_free_buffer_mask;
8326 
8327 	/* Only allocate trace_printk buffers if a trace_printk exists */
8328 	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8329 		/* Must be called before global_trace.buffer is allocated */
8330 		trace_printk_init_buffers();
8331 
8332 	/* To save memory, keep the ring buffer size to its minimum */
8333 	if (ring_buffer_expanded)
8334 		ring_buf_size = trace_buf_size;
8335 	else
8336 		ring_buf_size = 1;
8337 
8338 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8339 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8340 
8341 	raw_spin_lock_init(&global_trace.start_lock);
8342 
8343 	/*
8344 	 * The prepare callbacks allocates some memory for the ring buffer. We
8345 	 * don't free the buffer if the if the CPU goes down. If we were to free
8346 	 * the buffer, then the user would lose any trace that was in the
8347 	 * buffer. The memory will be removed once the "instance" is removed.
8348 	 */
8349 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8350 				      "trace/RB:preapre", trace_rb_cpu_prepare,
8351 				      NULL);
8352 	if (ret < 0)
8353 		goto out_free_cpumask;
8354 	/* Used for event triggers */
8355 	ret = -ENOMEM;
8356 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8357 	if (!temp_buffer)
8358 		goto out_rm_hp_state;
8359 
8360 	if (trace_create_savedcmd() < 0)
8361 		goto out_free_temp_buffer;
8362 
8363 	/* TODO: make the number of buffers hot pluggable with CPUS */
8364 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8365 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8366 		WARN_ON(1);
8367 		goto out_free_savedcmd;
8368 	}
8369 
8370 	if (global_trace.buffer_disabled)
8371 		tracing_off();
8372 
8373 	if (trace_boot_clock) {
8374 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
8375 		if (ret < 0)
8376 			pr_warn("Trace clock %s not defined, going back to default\n",
8377 				trace_boot_clock);
8378 	}
8379 
8380 	/*
8381 	 * register_tracer() might reference current_trace, so it
8382 	 * needs to be set before we register anything. This is
8383 	 * just a bootstrap of current_trace anyway.
8384 	 */
8385 	global_trace.current_trace = &nop_trace;
8386 
8387 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8388 
8389 	ftrace_init_global_array_ops(&global_trace);
8390 
8391 	init_trace_flags_index(&global_trace);
8392 
8393 	register_tracer(&nop_trace);
8394 
8395 	/* Function tracing may start here (via kernel command line) */
8396 	init_function_trace();
8397 
8398 	/* All seems OK, enable tracing */
8399 	tracing_disabled = 0;
8400 
8401 	atomic_notifier_chain_register(&panic_notifier_list,
8402 				       &trace_panic_notifier);
8403 
8404 	register_die_notifier(&trace_die_notifier);
8405 
8406 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8407 
8408 	INIT_LIST_HEAD(&global_trace.systems);
8409 	INIT_LIST_HEAD(&global_trace.events);
8410 	list_add(&global_trace.list, &ftrace_trace_arrays);
8411 
8412 	apply_trace_boot_options();
8413 
8414 	register_snapshot_cmd();
8415 
8416 	return 0;
8417 
8418 out_free_savedcmd:
8419 	free_saved_cmdlines_buffer(savedcmd);
8420 out_free_temp_buffer:
8421 	ring_buffer_free(temp_buffer);
8422 out_rm_hp_state:
8423 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8424 out_free_cpumask:
8425 	free_cpumask_var(global_trace.tracing_cpumask);
8426 out_free_buffer_mask:
8427 	free_cpumask_var(tracing_buffer_mask);
8428 out:
8429 	return ret;
8430 }
8431 
early_trace_init(void)8432 void __init early_trace_init(void)
8433 {
8434 	if (tracepoint_printk) {
8435 		tracepoint_print_iter =
8436 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8437 		if (WARN_ON(!tracepoint_print_iter))
8438 			tracepoint_printk = 0;
8439 		else
8440 			static_key_enable(&tracepoint_printk_key.key);
8441 	}
8442 	tracer_alloc_buffers();
8443 }
8444 
trace_init(void)8445 void __init trace_init(void)
8446 {
8447 	trace_event_init();
8448 }
8449 
clear_boot_tracer(void)8450 __init static int clear_boot_tracer(void)
8451 {
8452 	/*
8453 	 * The default tracer at boot buffer is an init section.
8454 	 * This function is called in lateinit. If we did not
8455 	 * find the boot tracer, then clear it out, to prevent
8456 	 * later registration from accessing the buffer that is
8457 	 * about to be freed.
8458 	 */
8459 	if (!default_bootup_tracer)
8460 		return 0;
8461 
8462 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8463 	       default_bootup_tracer);
8464 	default_bootup_tracer = NULL;
8465 
8466 	return 0;
8467 }
8468 
8469 fs_initcall(tracer_init_tracefs);
8470 late_initcall_sync(clear_boot_tracer);
8471