• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/poll.h>
42 #include <linux/nmi.h>
43 #include <linux/fs.h>
44 #include <linux/sched/rt.h>
45 
46 #include "trace.h"
47 #include "trace_output.h"
48 
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54 
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63 
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68 
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 
73 /* For tracers that don't implement custom flags */
74 static struct tracer_opt dummy_tracer_opt[] = {
75 	{ }
76 };
77 
78 static struct tracer_flags dummy_tracer_flags = {
79 	.val = 0,
80 	.opts = dummy_tracer_opt
81 };
82 
83 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)84 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
85 {
86 	return 0;
87 }
88 
89 /*
90  * To prevent the comm cache from being overwritten when no
91  * tracing is active, only save the comm when a trace event
92  * occurred.
93  */
94 static DEFINE_PER_CPU(bool, trace_cmdline_save);
95 
96 /*
97  * Kill all tracing for good (never come back).
98  * It is initialized to 1 but will turn to zero if the initialization
99  * of the tracer is successful. But that is the only place that sets
100  * this back to zero.
101  */
102 static int tracing_disabled = 1;
103 
104 cpumask_var_t __read_mostly	tracing_buffer_mask;
105 
106 /*
107  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
108  *
109  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
110  * is set, then ftrace_dump is called. This will output the contents
111  * of the ftrace buffers to the console.  This is very useful for
112  * capturing traces that lead to crashes and outputing it to a
113  * serial console.
114  *
115  * It is default off, but you can enable it with either specifying
116  * "ftrace_dump_on_oops" in the kernel command line, or setting
117  * /proc/sys/kernel/ftrace_dump_on_oops
118  * Set 1 if you want to dump buffers of all CPUs
119  * Set 2 if you want to dump the buffer of the CPU that triggered oops
120  */
121 
122 enum ftrace_dump_mode ftrace_dump_on_oops;
123 
124 /* When set, tracing will stop when a WARN*() is hit */
125 int __disable_trace_on_warning;
126 
127 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
128 /* Map of enums to their values, for "enum_map" file */
129 struct trace_enum_map_head {
130 	struct module			*mod;
131 	unsigned long			length;
132 };
133 
134 union trace_enum_map_item;
135 
136 struct trace_enum_map_tail {
137 	/*
138 	 * "end" is first and points to NULL as it must be different
139 	 * than "mod" or "enum_string"
140 	 */
141 	union trace_enum_map_item	*next;
142 	const char			*end;	/* points to NULL */
143 };
144 
145 static DEFINE_MUTEX(trace_enum_mutex);
146 
147 /*
148  * The trace_enum_maps are saved in an array with two extra elements,
149  * one at the beginning, and one at the end. The beginning item contains
150  * the count of the saved maps (head.length), and the module they
151  * belong to if not built in (head.mod). The ending item contains a
152  * pointer to the next array of saved enum_map items.
153  */
154 union trace_enum_map_item {
155 	struct trace_enum_map		map;
156 	struct trace_enum_map_head	head;
157 	struct trace_enum_map_tail	tail;
158 };
159 
160 static union trace_enum_map_item *trace_enum_maps;
161 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
162 
163 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
164 
165 #define MAX_TRACER_SIZE		100
166 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
167 static char *default_bootup_tracer;
168 
169 static bool allocate_snapshot;
170 
set_cmdline_ftrace(char * str)171 static int __init set_cmdline_ftrace(char *str)
172 {
173 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
174 	default_bootup_tracer = bootup_tracer_buf;
175 	/* We are using ftrace early, expand it */
176 	ring_buffer_expanded = true;
177 	return 1;
178 }
179 __setup("ftrace=", set_cmdline_ftrace);
180 
set_ftrace_dump_on_oops(char * str)181 static int __init set_ftrace_dump_on_oops(char *str)
182 {
183 	if (*str++ != '=' || !*str) {
184 		ftrace_dump_on_oops = DUMP_ALL;
185 		return 1;
186 	}
187 
188 	if (!strcmp("orig_cpu", str)) {
189 		ftrace_dump_on_oops = DUMP_ORIG;
190                 return 1;
191         }
192 
193         return 0;
194 }
195 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
196 
stop_trace_on_warning(char * str)197 static int __init stop_trace_on_warning(char *str)
198 {
199 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
200 		__disable_trace_on_warning = 1;
201 	return 1;
202 }
203 __setup("traceoff_on_warning", stop_trace_on_warning);
204 
boot_alloc_snapshot(char * str)205 static int __init boot_alloc_snapshot(char *str)
206 {
207 	allocate_snapshot = true;
208 	/* We also need the main ring buffer expanded */
209 	ring_buffer_expanded = true;
210 	return 1;
211 }
212 __setup("alloc_snapshot", boot_alloc_snapshot);
213 
214 
215 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
216 
set_trace_boot_options(char * str)217 static int __init set_trace_boot_options(char *str)
218 {
219 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
220 	return 0;
221 }
222 __setup("trace_options=", set_trace_boot_options);
223 
224 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
225 static char *trace_boot_clock __initdata;
226 
set_trace_boot_clock(char * str)227 static int __init set_trace_boot_clock(char *str)
228 {
229 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
230 	trace_boot_clock = trace_boot_clock_buf;
231 	return 0;
232 }
233 __setup("trace_clock=", set_trace_boot_clock);
234 
set_tracepoint_printk(char * str)235 static int __init set_tracepoint_printk(char *str)
236 {
237 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
238 		tracepoint_printk = 1;
239 	return 1;
240 }
241 __setup("tp_printk", set_tracepoint_printk);
242 
ns2usecs(cycle_t nsec)243 unsigned long long ns2usecs(cycle_t nsec)
244 {
245 	nsec += 500;
246 	do_div(nsec, 1000);
247 	return nsec;
248 }
249 
250 /* trace_flags holds trace_options default values */
251 #define TRACE_DEFAULT_FLAGS						\
252 	(FUNCTION_DEFAULT_FLAGS |					\
253 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
254 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
255 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
256 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
257 
258 /* trace_options that are only supported by global_trace */
259 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
260 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
261 
262 
263 /*
264  * The global_trace is the descriptor that holds the tracing
265  * buffers for the live tracing. For each CPU, it contains
266  * a link list of pages that will store trace entries. The
267  * page descriptor of the pages in the memory is used to hold
268  * the link list by linking the lru item in the page descriptor
269  * to each of the pages in the buffer per CPU.
270  *
271  * For each active CPU there is a data field that holds the
272  * pages for the buffer for that CPU. Each CPU has the same number
273  * of pages allocated for its buffer.
274  */
275 static struct trace_array global_trace = {
276 	.trace_flags = TRACE_DEFAULT_FLAGS,
277 };
278 
279 LIST_HEAD(ftrace_trace_arrays);
280 
trace_array_get(struct trace_array * this_tr)281 int trace_array_get(struct trace_array *this_tr)
282 {
283 	struct trace_array *tr;
284 	int ret = -ENODEV;
285 
286 	mutex_lock(&trace_types_lock);
287 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
288 		if (tr == this_tr) {
289 			tr->ref++;
290 			ret = 0;
291 			break;
292 		}
293 	}
294 	mutex_unlock(&trace_types_lock);
295 
296 	return ret;
297 }
298 
__trace_array_put(struct trace_array * this_tr)299 static void __trace_array_put(struct trace_array *this_tr)
300 {
301 	WARN_ON(!this_tr->ref);
302 	this_tr->ref--;
303 }
304 
trace_array_put(struct trace_array * this_tr)305 void trace_array_put(struct trace_array *this_tr)
306 {
307 	mutex_lock(&trace_types_lock);
308 	__trace_array_put(this_tr);
309 	mutex_unlock(&trace_types_lock);
310 }
311 
filter_check_discard(struct trace_event_file * file,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)312 int filter_check_discard(struct trace_event_file *file, void *rec,
313 			 struct ring_buffer *buffer,
314 			 struct ring_buffer_event *event)
315 {
316 	if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
317 	    !filter_match_preds(file->filter, rec)) {
318 		ring_buffer_discard_commit(buffer, event);
319 		return 1;
320 	}
321 
322 	return 0;
323 }
324 EXPORT_SYMBOL_GPL(filter_check_discard);
325 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct ring_buffer * buffer,struct ring_buffer_event * event)326 int call_filter_check_discard(struct trace_event_call *call, void *rec,
327 			      struct ring_buffer *buffer,
328 			      struct ring_buffer_event *event)
329 {
330 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
331 	    !filter_match_preds(call->filter, rec)) {
332 		ring_buffer_discard_commit(buffer, event);
333 		return 1;
334 	}
335 
336 	return 0;
337 }
338 EXPORT_SYMBOL_GPL(call_filter_check_discard);
339 
buffer_ftrace_now(struct trace_buffer * buf,int cpu)340 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
341 {
342 	u64 ts;
343 
344 	/* Early boot up does not have a buffer yet */
345 	if (!buf->buffer)
346 		return trace_clock_local();
347 
348 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
349 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
350 
351 	return ts;
352 }
353 
ftrace_now(int cpu)354 cycle_t ftrace_now(int cpu)
355 {
356 	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
357 }
358 
359 /**
360  * tracing_is_enabled - Show if global_trace has been disabled
361  *
362  * Shows if the global trace has been enabled or not. It uses the
363  * mirror flag "buffer_disabled" to be used in fast paths such as for
364  * the irqsoff tracer. But it may be inaccurate due to races. If you
365  * need to know the accurate state, use tracing_is_on() which is a little
366  * slower, but accurate.
367  */
tracing_is_enabled(void)368 int tracing_is_enabled(void)
369 {
370 	/*
371 	 * For quick access (irqsoff uses this in fast path), just
372 	 * return the mirror variable of the state of the ring buffer.
373 	 * It's a little racy, but we don't really care.
374 	 */
375 	smp_rmb();
376 	return !global_trace.buffer_disabled;
377 }
378 
379 /*
380  * trace_buf_size is the size in bytes that is allocated
381  * for a buffer. Note, the number of bytes is always rounded
382  * to page size.
383  *
384  * This number is purposely set to a low number of 16384.
385  * If the dump on oops happens, it will be much appreciated
386  * to not have to wait for all that output. Anyway this can be
387  * boot time and run time configurable.
388  */
389 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
390 
391 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
392 
393 /* trace_types holds a link list of available tracers. */
394 static struct tracer		*trace_types __read_mostly;
395 
396 /*
397  * trace_types_lock is used to protect the trace_types list.
398  */
399 DEFINE_MUTEX(trace_types_lock);
400 
401 /*
402  * serialize the access of the ring buffer
403  *
404  * ring buffer serializes readers, but it is low level protection.
405  * The validity of the events (which returns by ring_buffer_peek() ..etc)
406  * are not protected by ring buffer.
407  *
408  * The content of events may become garbage if we allow other process consumes
409  * these events concurrently:
410  *   A) the page of the consumed events may become a normal page
411  *      (not reader page) in ring buffer, and this page will be rewrited
412  *      by events producer.
413  *   B) The page of the consumed events may become a page for splice_read,
414  *      and this page will be returned to system.
415  *
416  * These primitives allow multi process access to different cpu ring buffer
417  * concurrently.
418  *
419  * These primitives don't distinguish read-only and read-consume access.
420  * Multi read-only access are also serialized.
421  */
422 
423 #ifdef CONFIG_SMP
424 static DECLARE_RWSEM(all_cpu_access_lock);
425 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
426 
trace_access_lock(int cpu)427 static inline void trace_access_lock(int cpu)
428 {
429 	if (cpu == RING_BUFFER_ALL_CPUS) {
430 		/* gain it for accessing the whole ring buffer. */
431 		down_write(&all_cpu_access_lock);
432 	} else {
433 		/* gain it for accessing a cpu ring buffer. */
434 
435 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
436 		down_read(&all_cpu_access_lock);
437 
438 		/* Secondly block other access to this @cpu ring buffer. */
439 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
440 	}
441 }
442 
trace_access_unlock(int cpu)443 static inline void trace_access_unlock(int cpu)
444 {
445 	if (cpu == RING_BUFFER_ALL_CPUS) {
446 		up_write(&all_cpu_access_lock);
447 	} else {
448 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
449 		up_read(&all_cpu_access_lock);
450 	}
451 }
452 
trace_access_lock_init(void)453 static inline void trace_access_lock_init(void)
454 {
455 	int cpu;
456 
457 	for_each_possible_cpu(cpu)
458 		mutex_init(&per_cpu(cpu_access_lock, cpu));
459 }
460 
461 #else
462 
463 static DEFINE_MUTEX(access_lock);
464 
trace_access_lock(int cpu)465 static inline void trace_access_lock(int cpu)
466 {
467 	(void)cpu;
468 	mutex_lock(&access_lock);
469 }
470 
trace_access_unlock(int cpu)471 static inline void trace_access_unlock(int cpu)
472 {
473 	(void)cpu;
474 	mutex_unlock(&access_lock);
475 }
476 
trace_access_lock_init(void)477 static inline void trace_access_lock_init(void)
478 {
479 }
480 
481 #endif
482 
483 #ifdef CONFIG_STACKTRACE
484 static void __ftrace_trace_stack(struct ring_buffer *buffer,
485 				 unsigned long flags,
486 				 int skip, int pc, struct pt_regs *regs);
487 static inline void ftrace_trace_stack(struct trace_array *tr,
488 				      struct ring_buffer *buffer,
489 				      unsigned long flags,
490 				      int skip, int pc, struct pt_regs *regs);
491 
492 #else
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)493 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
494 					unsigned long flags,
495 					int skip, int pc, struct pt_regs *regs)
496 {
497 }
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)498 static inline void ftrace_trace_stack(struct trace_array *tr,
499 				      struct ring_buffer *buffer,
500 				      unsigned long flags,
501 				      int skip, int pc, struct pt_regs *regs)
502 {
503 }
504 
505 #endif
506 
tracer_tracing_on(struct trace_array * tr)507 static void tracer_tracing_on(struct trace_array *tr)
508 {
509 	if (tr->trace_buffer.buffer)
510 		ring_buffer_record_on(tr->trace_buffer.buffer);
511 	/*
512 	 * This flag is looked at when buffers haven't been allocated
513 	 * yet, or by some tracers (like irqsoff), that just want to
514 	 * know if the ring buffer has been disabled, but it can handle
515 	 * races of where it gets disabled but we still do a record.
516 	 * As the check is in the fast path of the tracers, it is more
517 	 * important to be fast than accurate.
518 	 */
519 	tr->buffer_disabled = 0;
520 	/* Make the flag seen by readers */
521 	smp_wmb();
522 }
523 
524 /**
525  * tracing_on - enable tracing buffers
526  *
527  * This function enables tracing buffers that may have been
528  * disabled with tracing_off.
529  */
tracing_on(void)530 void tracing_on(void)
531 {
532 	tracer_tracing_on(&global_trace);
533 }
534 EXPORT_SYMBOL_GPL(tracing_on);
535 
536 /**
537  * __trace_puts - write a constant string into the trace buffer.
538  * @ip:	   The address of the caller
539  * @str:   The constant string to write
540  * @size:  The size of the string.
541  */
__trace_puts(unsigned long ip,const char * str,int size)542 int __trace_puts(unsigned long ip, const char *str, int size)
543 {
544 	struct ring_buffer_event *event;
545 	struct ring_buffer *buffer;
546 	struct print_entry *entry;
547 	unsigned long irq_flags;
548 	int alloc;
549 	int pc;
550 
551 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
552 		return 0;
553 
554 	pc = preempt_count();
555 
556 	if (unlikely(tracing_selftest_running || tracing_disabled))
557 		return 0;
558 
559 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
560 
561 	local_save_flags(irq_flags);
562 	buffer = global_trace.trace_buffer.buffer;
563 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
564 					  irq_flags, pc);
565 	if (!event)
566 		return 0;
567 
568 	entry = ring_buffer_event_data(event);
569 	entry->ip = ip;
570 
571 	memcpy(&entry->buf, str, size);
572 
573 	/* Add a newline if necessary */
574 	if (entry->buf[size - 1] != '\n') {
575 		entry->buf[size] = '\n';
576 		entry->buf[size + 1] = '\0';
577 	} else
578 		entry->buf[size] = '\0';
579 
580 	__buffer_unlock_commit(buffer, event);
581 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
582 
583 	return size;
584 }
585 EXPORT_SYMBOL_GPL(__trace_puts);
586 
587 /**
588  * __trace_bputs - write the pointer to a constant string into trace buffer
589  * @ip:	   The address of the caller
590  * @str:   The constant string to write to the buffer to
591  */
__trace_bputs(unsigned long ip,const char * str)592 int __trace_bputs(unsigned long ip, const char *str)
593 {
594 	struct ring_buffer_event *event;
595 	struct ring_buffer *buffer;
596 	struct bputs_entry *entry;
597 	unsigned long irq_flags;
598 	int size = sizeof(struct bputs_entry);
599 	int pc;
600 
601 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
602 		return 0;
603 
604 	pc = preempt_count();
605 
606 	if (unlikely(tracing_selftest_running || tracing_disabled))
607 		return 0;
608 
609 	local_save_flags(irq_flags);
610 	buffer = global_trace.trace_buffer.buffer;
611 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
612 					  irq_flags, pc);
613 	if (!event)
614 		return 0;
615 
616 	entry = ring_buffer_event_data(event);
617 	entry->ip			= ip;
618 	entry->str			= str;
619 
620 	__buffer_unlock_commit(buffer, event);
621 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
622 
623 	return 1;
624 }
625 EXPORT_SYMBOL_GPL(__trace_bputs);
626 
627 #ifdef CONFIG_TRACER_SNAPSHOT
628 /**
629  * trace_snapshot - take a snapshot of the current buffer.
630  *
631  * This causes a swap between the snapshot buffer and the current live
632  * tracing buffer. You can use this to take snapshots of the live
633  * trace when some condition is triggered, but continue to trace.
634  *
635  * Note, make sure to allocate the snapshot with either
636  * a tracing_snapshot_alloc(), or by doing it manually
637  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
638  *
639  * If the snapshot buffer is not allocated, it will stop tracing.
640  * Basically making a permanent snapshot.
641  */
tracing_snapshot(void)642 void tracing_snapshot(void)
643 {
644 	struct trace_array *tr = &global_trace;
645 	struct tracer *tracer = tr->current_trace;
646 	unsigned long flags;
647 
648 	if (in_nmi()) {
649 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
650 		internal_trace_puts("*** snapshot is being ignored        ***\n");
651 		return;
652 	}
653 
654 	if (!tr->allocated_snapshot) {
655 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
656 		internal_trace_puts("*** stopping trace here!   ***\n");
657 		tracing_off();
658 		return;
659 	}
660 
661 	/* Note, snapshot can not be used when the tracer uses it */
662 	if (tracer->use_max_tr) {
663 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
664 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
665 		return;
666 	}
667 
668 	local_irq_save(flags);
669 	update_max_tr(tr, current, smp_processor_id());
670 	local_irq_restore(flags);
671 }
672 EXPORT_SYMBOL_GPL(tracing_snapshot);
673 
674 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
675 					struct trace_buffer *size_buf, int cpu_id);
676 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
677 
alloc_snapshot(struct trace_array * tr)678 static int alloc_snapshot(struct trace_array *tr)
679 {
680 	int ret;
681 
682 	if (!tr->allocated_snapshot) {
683 
684 		/* allocate spare buffer */
685 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
686 				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
687 		if (ret < 0)
688 			return ret;
689 
690 		tr->allocated_snapshot = true;
691 	}
692 
693 	return 0;
694 }
695 
free_snapshot(struct trace_array * tr)696 static void free_snapshot(struct trace_array *tr)
697 {
698 	/*
699 	 * We don't free the ring buffer. instead, resize it because
700 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
701 	 * we want preserve it.
702 	 */
703 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
704 	set_buffer_entries(&tr->max_buffer, 1);
705 	tracing_reset_online_cpus(&tr->max_buffer);
706 	tr->allocated_snapshot = false;
707 }
708 
709 /**
710  * tracing_alloc_snapshot - allocate snapshot buffer.
711  *
712  * This only allocates the snapshot buffer if it isn't already
713  * allocated - it doesn't also take a snapshot.
714  *
715  * This is meant to be used in cases where the snapshot buffer needs
716  * to be set up for events that can't sleep but need to be able to
717  * trigger a snapshot.
718  */
tracing_alloc_snapshot(void)719 int tracing_alloc_snapshot(void)
720 {
721 	struct trace_array *tr = &global_trace;
722 	int ret;
723 
724 	ret = alloc_snapshot(tr);
725 	WARN_ON(ret < 0);
726 
727 	return ret;
728 }
729 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
730 
731 /**
732  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
733  *
734  * This is similar to trace_snapshot(), but it will allocate the
735  * snapshot buffer if it isn't already allocated. Use this only
736  * where it is safe to sleep, as the allocation may sleep.
737  *
738  * This causes a swap between the snapshot buffer and the current live
739  * tracing buffer. You can use this to take snapshots of the live
740  * trace when some condition is triggered, but continue to trace.
741  */
tracing_snapshot_alloc(void)742 void tracing_snapshot_alloc(void)
743 {
744 	int ret;
745 
746 	ret = tracing_alloc_snapshot();
747 	if (ret < 0)
748 		return;
749 
750 	tracing_snapshot();
751 }
752 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 #else
tracing_snapshot(void)754 void tracing_snapshot(void)
755 {
756 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
757 }
758 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_alloc_snapshot(void)759 int tracing_alloc_snapshot(void)
760 {
761 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
762 	return -ENODEV;
763 }
764 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)765 void tracing_snapshot_alloc(void)
766 {
767 	/* Give warning */
768 	tracing_snapshot();
769 }
770 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
771 #endif /* CONFIG_TRACER_SNAPSHOT */
772 
tracer_tracing_off(struct trace_array * tr)773 static void tracer_tracing_off(struct trace_array *tr)
774 {
775 	if (tr->trace_buffer.buffer)
776 		ring_buffer_record_off(tr->trace_buffer.buffer);
777 	/*
778 	 * This flag is looked at when buffers haven't been allocated
779 	 * yet, or by some tracers (like irqsoff), that just want to
780 	 * know if the ring buffer has been disabled, but it can handle
781 	 * races of where it gets disabled but we still do a record.
782 	 * As the check is in the fast path of the tracers, it is more
783 	 * important to be fast than accurate.
784 	 */
785 	tr->buffer_disabled = 1;
786 	/* Make the flag seen by readers */
787 	smp_wmb();
788 }
789 
790 /**
791  * tracing_off - turn off tracing buffers
792  *
793  * This function stops the tracing buffers from recording data.
794  * It does not disable any overhead the tracers themselves may
795  * be causing. This function simply causes all recording to
796  * the ring buffers to fail.
797  */
tracing_off(void)798 void tracing_off(void)
799 {
800 	tracer_tracing_off(&global_trace);
801 }
802 EXPORT_SYMBOL_GPL(tracing_off);
803 
disable_trace_on_warning(void)804 void disable_trace_on_warning(void)
805 {
806 	if (__disable_trace_on_warning)
807 		tracing_off();
808 }
809 
810 /**
811  * tracer_tracing_is_on - show real state of ring buffer enabled
812  * @tr : the trace array to know if ring buffer is enabled
813  *
814  * Shows real state of the ring buffer if it is enabled or not.
815  */
tracer_tracing_is_on(struct trace_array * tr)816 static int tracer_tracing_is_on(struct trace_array *tr)
817 {
818 	if (tr->trace_buffer.buffer)
819 		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
820 	return !tr->buffer_disabled;
821 }
822 
823 /**
824  * tracing_is_on - show state of ring buffers enabled
825  */
tracing_is_on(void)826 int tracing_is_on(void)
827 {
828 	return tracer_tracing_is_on(&global_trace);
829 }
830 EXPORT_SYMBOL_GPL(tracing_is_on);
831 
set_buf_size(char * str)832 static int __init set_buf_size(char *str)
833 {
834 	unsigned long buf_size;
835 
836 	if (!str)
837 		return 0;
838 	buf_size = memparse(str, &str);
839 	/* nr_entries can not be zero */
840 	if (buf_size == 0)
841 		return 0;
842 	trace_buf_size = buf_size;
843 	return 1;
844 }
845 __setup("trace_buf_size=", set_buf_size);
846 
set_tracing_thresh(char * str)847 static int __init set_tracing_thresh(char *str)
848 {
849 	unsigned long threshold;
850 	int ret;
851 
852 	if (!str)
853 		return 0;
854 	ret = kstrtoul(str, 0, &threshold);
855 	if (ret < 0)
856 		return 0;
857 	tracing_thresh = threshold * 1000;
858 	return 1;
859 }
860 __setup("tracing_thresh=", set_tracing_thresh);
861 
nsecs_to_usecs(unsigned long nsecs)862 unsigned long nsecs_to_usecs(unsigned long nsecs)
863 {
864 	return nsecs / 1000;
865 }
866 
867 /*
868  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
869  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
870  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
871  * of strings in the order that the enums were defined.
872  */
873 #undef C
874 #define C(a, b) b
875 
876 /* These must match the bit postions in trace_iterator_flags */
877 static const char *trace_options[] = {
878 	TRACE_FLAGS
879 	NULL
880 };
881 
882 static struct {
883 	u64 (*func)(void);
884 	const char *name;
885 	int in_ns;		/* is this clock in nanoseconds? */
886 } trace_clocks[] = {
887 	{ trace_clock_local,		"local",	1 },
888 	{ trace_clock_global,		"global",	1 },
889 	{ trace_clock_counter,		"counter",	0 },
890 	{ trace_clock_jiffies,		"uptime",	0 },
891 	{ trace_clock,			"perf",		1 },
892 	{ ktime_get_mono_fast_ns,	"mono",		1 },
893 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
894 	{ ktime_get_boot_fast_ns,	"boot",		1 },
895 	ARCH_TRACE_CLOCKS
896 };
897 
898 /*
899  * trace_parser_get_init - gets the buffer for trace parser
900  */
trace_parser_get_init(struct trace_parser * parser,int size)901 int trace_parser_get_init(struct trace_parser *parser, int size)
902 {
903 	memset(parser, 0, sizeof(*parser));
904 
905 	parser->buffer = kmalloc(size, GFP_KERNEL);
906 	if (!parser->buffer)
907 		return 1;
908 
909 	parser->size = size;
910 	return 0;
911 }
912 
913 /*
914  * trace_parser_put - frees the buffer for trace parser
915  */
trace_parser_put(struct trace_parser * parser)916 void trace_parser_put(struct trace_parser *parser)
917 {
918 	kfree(parser->buffer);
919 }
920 
921 /*
922  * trace_get_user - reads the user input string separated by  space
923  * (matched by isspace(ch))
924  *
925  * For each string found the 'struct trace_parser' is updated,
926  * and the function returns.
927  *
928  * Returns number of bytes read.
929  *
930  * See kernel/trace/trace.h for 'struct trace_parser' details.
931  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)932 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
933 	size_t cnt, loff_t *ppos)
934 {
935 	char ch;
936 	size_t read = 0;
937 	ssize_t ret;
938 
939 	if (!*ppos)
940 		trace_parser_clear(parser);
941 
942 	ret = get_user(ch, ubuf++);
943 	if (ret)
944 		goto out;
945 
946 	read++;
947 	cnt--;
948 
949 	/*
950 	 * The parser is not finished with the last write,
951 	 * continue reading the user input without skipping spaces.
952 	 */
953 	if (!parser->cont) {
954 		/* skip white space */
955 		while (cnt && isspace(ch)) {
956 			ret = get_user(ch, ubuf++);
957 			if (ret)
958 				goto out;
959 			read++;
960 			cnt--;
961 		}
962 
963 		/* only spaces were written */
964 		if (isspace(ch)) {
965 			*ppos += read;
966 			ret = read;
967 			goto out;
968 		}
969 
970 		parser->idx = 0;
971 	}
972 
973 	/* read the non-space input */
974 	while (cnt && !isspace(ch)) {
975 		if (parser->idx < parser->size - 1)
976 			parser->buffer[parser->idx++] = ch;
977 		else {
978 			ret = -EINVAL;
979 			goto out;
980 		}
981 		ret = get_user(ch, ubuf++);
982 		if (ret)
983 			goto out;
984 		read++;
985 		cnt--;
986 	}
987 
988 	/* We either got finished input or we have to wait for another call. */
989 	if (isspace(ch)) {
990 		parser->buffer[parser->idx] = 0;
991 		parser->cont = false;
992 	} else if (parser->idx < parser->size - 1) {
993 		parser->cont = true;
994 		parser->buffer[parser->idx++] = ch;
995 	} else {
996 		ret = -EINVAL;
997 		goto out;
998 	}
999 
1000 	*ppos += read;
1001 	ret = read;
1002 
1003 out:
1004 	return ret;
1005 }
1006 
1007 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1008 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1009 {
1010 	int len;
1011 
1012 	if (trace_seq_used(s) <= s->seq.readpos)
1013 		return -EBUSY;
1014 
1015 	len = trace_seq_used(s) - s->seq.readpos;
1016 	if (cnt > len)
1017 		cnt = len;
1018 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1019 
1020 	s->seq.readpos += cnt;
1021 	return cnt;
1022 }
1023 
1024 unsigned long __read_mostly	tracing_thresh;
1025 
1026 #ifdef CONFIG_TRACER_MAX_TRACE
1027 /*
1028  * Copy the new maximum trace into the separate maximum-trace
1029  * structure. (this way the maximum trace is permanently saved,
1030  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1031  */
1032 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1033 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1034 {
1035 	struct trace_buffer *trace_buf = &tr->trace_buffer;
1036 	struct trace_buffer *max_buf = &tr->max_buffer;
1037 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1038 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1039 
1040 	max_buf->cpu = cpu;
1041 	max_buf->time_start = data->preempt_timestamp;
1042 
1043 	max_data->saved_latency = tr->max_latency;
1044 	max_data->critical_start = data->critical_start;
1045 	max_data->critical_end = data->critical_end;
1046 
1047 	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1048 	max_data->pid = tsk->pid;
1049 	/*
1050 	 * If tsk == current, then use current_uid(), as that does not use
1051 	 * RCU. The irq tracer can be called out of RCU scope.
1052 	 */
1053 	if (tsk == current)
1054 		max_data->uid = current_uid();
1055 	else
1056 		max_data->uid = task_uid(tsk);
1057 
1058 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1059 	max_data->policy = tsk->policy;
1060 	max_data->rt_priority = tsk->rt_priority;
1061 
1062 	/* record this tasks comm */
1063 	tracing_record_cmdline(tsk);
1064 }
1065 
1066 /**
1067  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1068  * @tr: tracer
1069  * @tsk: the task with the latency
1070  * @cpu: The cpu that initiated the trace.
1071  *
1072  * Flip the buffers between the @tr and the max_tr and record information
1073  * about which task was the cause of this latency.
1074  */
1075 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1076 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1077 {
1078 	struct ring_buffer *buf;
1079 
1080 	if (tr->stop_count)
1081 		return;
1082 
1083 	WARN_ON_ONCE(!irqs_disabled());
1084 
1085 	if (!tr->allocated_snapshot) {
1086 		/* Only the nop tracer should hit this when disabling */
1087 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1088 		return;
1089 	}
1090 
1091 	arch_spin_lock(&tr->max_lock);
1092 
1093 	/* Inherit the recordable setting from trace_buffer */
1094 	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1095 		ring_buffer_record_on(tr->max_buffer.buffer);
1096 	else
1097 		ring_buffer_record_off(tr->max_buffer.buffer);
1098 
1099 	buf = tr->trace_buffer.buffer;
1100 	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1101 	tr->max_buffer.buffer = buf;
1102 
1103 	__update_max_tr(tr, tsk, cpu);
1104 	arch_spin_unlock(&tr->max_lock);
1105 }
1106 
1107 /**
1108  * update_max_tr_single - only copy one trace over, and reset the rest
1109  * @tr - tracer
1110  * @tsk - task with the latency
1111  * @cpu - the cpu of the buffer to copy.
1112  *
1113  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1114  */
1115 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1116 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1117 {
1118 	int ret;
1119 
1120 	if (tr->stop_count)
1121 		return;
1122 
1123 	WARN_ON_ONCE(!irqs_disabled());
1124 	if (!tr->allocated_snapshot) {
1125 		/* Only the nop tracer should hit this when disabling */
1126 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1127 		return;
1128 	}
1129 
1130 	arch_spin_lock(&tr->max_lock);
1131 
1132 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1133 
1134 	if (ret == -EBUSY) {
1135 		/*
1136 		 * We failed to swap the buffer due to a commit taking
1137 		 * place on this CPU. We fail to record, but we reset
1138 		 * the max trace buffer (no one writes directly to it)
1139 		 * and flag that it failed.
1140 		 */
1141 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1142 			"Failed to swap buffers due to commit in progress\n");
1143 	}
1144 
1145 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1146 
1147 	__update_max_tr(tr, tsk, cpu);
1148 	arch_spin_unlock(&tr->max_lock);
1149 }
1150 #endif /* CONFIG_TRACER_MAX_TRACE */
1151 
wait_on_pipe(struct trace_iterator * iter,bool full)1152 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1153 {
1154 	/* Iterators are static, they should be filled or empty */
1155 	if (trace_buffer_iter(iter, iter->cpu_file))
1156 		return 0;
1157 
1158 	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1159 				full);
1160 }
1161 
1162 #ifdef CONFIG_FTRACE_STARTUP_TEST
run_tracer_selftest(struct tracer * type)1163 static int run_tracer_selftest(struct tracer *type)
1164 {
1165 	struct trace_array *tr = &global_trace;
1166 	struct tracer *saved_tracer = tr->current_trace;
1167 	int ret;
1168 
1169 	if (!type->selftest || tracing_selftest_disabled)
1170 		return 0;
1171 
1172 	/*
1173 	 * Run a selftest on this tracer.
1174 	 * Here we reset the trace buffer, and set the current
1175 	 * tracer to be this tracer. The tracer can then run some
1176 	 * internal tracing to verify that everything is in order.
1177 	 * If we fail, we do not register this tracer.
1178 	 */
1179 	tracing_reset_online_cpus(&tr->trace_buffer);
1180 
1181 	tr->current_trace = type;
1182 
1183 #ifdef CONFIG_TRACER_MAX_TRACE
1184 	if (type->use_max_tr) {
1185 		/* If we expanded the buffers, make sure the max is expanded too */
1186 		if (ring_buffer_expanded)
1187 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1188 					   RING_BUFFER_ALL_CPUS);
1189 		tr->allocated_snapshot = true;
1190 	}
1191 #endif
1192 
1193 	/* the test is responsible for initializing and enabling */
1194 	pr_info("Testing tracer %s: ", type->name);
1195 	ret = type->selftest(type, tr);
1196 	/* the test is responsible for resetting too */
1197 	tr->current_trace = saved_tracer;
1198 	if (ret) {
1199 		printk(KERN_CONT "FAILED!\n");
1200 		/* Add the warning after printing 'FAILED' */
1201 		WARN_ON(1);
1202 		return -1;
1203 	}
1204 	/* Only reset on passing, to avoid touching corrupted buffers */
1205 	tracing_reset_online_cpus(&tr->trace_buffer);
1206 
1207 #ifdef CONFIG_TRACER_MAX_TRACE
1208 	if (type->use_max_tr) {
1209 		tr->allocated_snapshot = false;
1210 
1211 		/* Shrink the max buffer again */
1212 		if (ring_buffer_expanded)
1213 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1214 					   RING_BUFFER_ALL_CPUS);
1215 	}
1216 #endif
1217 
1218 	printk(KERN_CONT "PASSED\n");
1219 	return 0;
1220 }
1221 #else
run_tracer_selftest(struct tracer * type)1222 static inline int run_tracer_selftest(struct tracer *type)
1223 {
1224 	return 0;
1225 }
1226 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1227 
1228 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1229 
1230 static void __init apply_trace_boot_options(void);
1231 
1232 /**
1233  * register_tracer - register a tracer with the ftrace system.
1234  * @type - the plugin for the tracer
1235  *
1236  * Register a new plugin tracer.
1237  */
register_tracer(struct tracer * type)1238 int __init register_tracer(struct tracer *type)
1239 {
1240 	struct tracer *t;
1241 	int ret = 0;
1242 
1243 	if (!type->name) {
1244 		pr_info("Tracer must have a name\n");
1245 		return -1;
1246 	}
1247 
1248 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1249 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1250 		return -1;
1251 	}
1252 
1253 	mutex_lock(&trace_types_lock);
1254 
1255 	tracing_selftest_running = true;
1256 
1257 	for (t = trace_types; t; t = t->next) {
1258 		if (strcmp(type->name, t->name) == 0) {
1259 			/* already found */
1260 			pr_info("Tracer %s already registered\n",
1261 				type->name);
1262 			ret = -1;
1263 			goto out;
1264 		}
1265 	}
1266 
1267 	if (!type->set_flag)
1268 		type->set_flag = &dummy_set_flag;
1269 	if (!type->flags)
1270 		type->flags = &dummy_tracer_flags;
1271 	else
1272 		if (!type->flags->opts)
1273 			type->flags->opts = dummy_tracer_opt;
1274 
1275 	ret = run_tracer_selftest(type);
1276 	if (ret < 0)
1277 		goto out;
1278 
1279 	type->next = trace_types;
1280 	trace_types = type;
1281 	add_tracer_options(&global_trace, type);
1282 
1283  out:
1284 	tracing_selftest_running = false;
1285 	mutex_unlock(&trace_types_lock);
1286 
1287 	if (ret || !default_bootup_tracer)
1288 		goto out_unlock;
1289 
1290 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1291 		goto out_unlock;
1292 
1293 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1294 	/* Do we want this tracer to start on bootup? */
1295 	tracing_set_tracer(&global_trace, type->name);
1296 	default_bootup_tracer = NULL;
1297 
1298 	apply_trace_boot_options();
1299 
1300 	/* disable other selftests, since this will break it. */
1301 	tracing_selftest_disabled = true;
1302 #ifdef CONFIG_FTRACE_STARTUP_TEST
1303 	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1304 	       type->name);
1305 #endif
1306 
1307  out_unlock:
1308 	return ret;
1309 }
1310 
tracing_reset(struct trace_buffer * buf,int cpu)1311 void tracing_reset(struct trace_buffer *buf, int cpu)
1312 {
1313 	struct ring_buffer *buffer = buf->buffer;
1314 
1315 	if (!buffer)
1316 		return;
1317 
1318 	ring_buffer_record_disable(buffer);
1319 
1320 	/* Make sure all commits have finished */
1321 	synchronize_sched();
1322 	ring_buffer_reset_cpu(buffer, cpu);
1323 
1324 	ring_buffer_record_enable(buffer);
1325 }
1326 
tracing_reset_online_cpus(struct trace_buffer * buf)1327 void tracing_reset_online_cpus(struct trace_buffer *buf)
1328 {
1329 	struct ring_buffer *buffer = buf->buffer;
1330 	int cpu;
1331 
1332 	if (!buffer)
1333 		return;
1334 
1335 	ring_buffer_record_disable(buffer);
1336 
1337 	/* Make sure all commits have finished */
1338 	synchronize_sched();
1339 
1340 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1341 
1342 	for_each_online_cpu(cpu)
1343 		ring_buffer_reset_cpu(buffer, cpu);
1344 
1345 	ring_buffer_record_enable(buffer);
1346 }
1347 
1348 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)1349 void tracing_reset_all_online_cpus(void)
1350 {
1351 	struct trace_array *tr;
1352 
1353 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1354 		tracing_reset_online_cpus(&tr->trace_buffer);
1355 #ifdef CONFIG_TRACER_MAX_TRACE
1356 		tracing_reset_online_cpus(&tr->max_buffer);
1357 #endif
1358 	}
1359 }
1360 
1361 #define SAVED_CMDLINES_DEFAULT 128
1362 #define NO_CMDLINE_MAP UINT_MAX
1363 static unsigned saved_tgids[SAVED_CMDLINES_DEFAULT];
1364 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1365 struct saved_cmdlines_buffer {
1366 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1367 	unsigned *map_cmdline_to_pid;
1368 	unsigned cmdline_num;
1369 	int cmdline_idx;
1370 	char *saved_cmdlines;
1371 };
1372 static struct saved_cmdlines_buffer *savedcmd;
1373 
get_saved_cmdlines(int idx)1374 static inline char *get_saved_cmdlines(int idx)
1375 {
1376 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1377 }
1378 
set_cmdline(int idx,const char * cmdline)1379 static inline void set_cmdline(int idx, const char *cmdline)
1380 {
1381 	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1382 }
1383 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)1384 static int allocate_cmdlines_buffer(unsigned int val,
1385 				    struct saved_cmdlines_buffer *s)
1386 {
1387 	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1388 					GFP_KERNEL);
1389 	if (!s->map_cmdline_to_pid)
1390 		return -ENOMEM;
1391 
1392 	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1393 	if (!s->saved_cmdlines) {
1394 		kfree(s->map_cmdline_to_pid);
1395 		return -ENOMEM;
1396 	}
1397 
1398 	s->cmdline_idx = 0;
1399 	s->cmdline_num = val;
1400 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1401 	       sizeof(s->map_pid_to_cmdline));
1402 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1403 	       val * sizeof(*s->map_cmdline_to_pid));
1404 
1405 	return 0;
1406 }
1407 
trace_create_savedcmd(void)1408 static int trace_create_savedcmd(void)
1409 {
1410 	int ret;
1411 
1412 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1413 	if (!savedcmd)
1414 		return -ENOMEM;
1415 
1416 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1417 	if (ret < 0) {
1418 		kfree(savedcmd);
1419 		savedcmd = NULL;
1420 		return -ENOMEM;
1421 	}
1422 
1423 	return 0;
1424 }
1425 
is_tracing_stopped(void)1426 int is_tracing_stopped(void)
1427 {
1428 	return global_trace.stop_count;
1429 }
1430 
1431 /**
1432  * tracing_start - quick start of the tracer
1433  *
1434  * If tracing is enabled but was stopped by tracing_stop,
1435  * this will start the tracer back up.
1436  */
tracing_start(void)1437 void tracing_start(void)
1438 {
1439 	struct ring_buffer *buffer;
1440 	unsigned long flags;
1441 
1442 	if (tracing_disabled)
1443 		return;
1444 
1445 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1446 	if (--global_trace.stop_count) {
1447 		if (global_trace.stop_count < 0) {
1448 			/* Someone screwed up their debugging */
1449 			WARN_ON_ONCE(1);
1450 			global_trace.stop_count = 0;
1451 		}
1452 		goto out;
1453 	}
1454 
1455 	/* Prevent the buffers from switching */
1456 	arch_spin_lock(&global_trace.max_lock);
1457 
1458 	buffer = global_trace.trace_buffer.buffer;
1459 	if (buffer)
1460 		ring_buffer_record_enable(buffer);
1461 
1462 #ifdef CONFIG_TRACER_MAX_TRACE
1463 	buffer = global_trace.max_buffer.buffer;
1464 	if (buffer)
1465 		ring_buffer_record_enable(buffer);
1466 #endif
1467 
1468 	arch_spin_unlock(&global_trace.max_lock);
1469 
1470  out:
1471 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1472 }
1473 
tracing_start_tr(struct trace_array * tr)1474 static void tracing_start_tr(struct trace_array *tr)
1475 {
1476 	struct ring_buffer *buffer;
1477 	unsigned long flags;
1478 
1479 	if (tracing_disabled)
1480 		return;
1481 
1482 	/* If global, we need to also start the max tracer */
1483 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1484 		return tracing_start();
1485 
1486 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1487 
1488 	if (--tr->stop_count) {
1489 		if (tr->stop_count < 0) {
1490 			/* Someone screwed up their debugging */
1491 			WARN_ON_ONCE(1);
1492 			tr->stop_count = 0;
1493 		}
1494 		goto out;
1495 	}
1496 
1497 	buffer = tr->trace_buffer.buffer;
1498 	if (buffer)
1499 		ring_buffer_record_enable(buffer);
1500 
1501  out:
1502 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1503 }
1504 
1505 /**
1506  * tracing_stop - quick stop of the tracer
1507  *
1508  * Light weight way to stop tracing. Use in conjunction with
1509  * tracing_start.
1510  */
tracing_stop(void)1511 void tracing_stop(void)
1512 {
1513 	struct ring_buffer *buffer;
1514 	unsigned long flags;
1515 
1516 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1517 	if (global_trace.stop_count++)
1518 		goto out;
1519 
1520 	/* Prevent the buffers from switching */
1521 	arch_spin_lock(&global_trace.max_lock);
1522 
1523 	buffer = global_trace.trace_buffer.buffer;
1524 	if (buffer)
1525 		ring_buffer_record_disable(buffer);
1526 
1527 #ifdef CONFIG_TRACER_MAX_TRACE
1528 	buffer = global_trace.max_buffer.buffer;
1529 	if (buffer)
1530 		ring_buffer_record_disable(buffer);
1531 #endif
1532 
1533 	arch_spin_unlock(&global_trace.max_lock);
1534 
1535  out:
1536 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1537 }
1538 
tracing_stop_tr(struct trace_array * tr)1539 static void tracing_stop_tr(struct trace_array *tr)
1540 {
1541 	struct ring_buffer *buffer;
1542 	unsigned long flags;
1543 
1544 	/* If global, we need to also stop the max tracer */
1545 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1546 		return tracing_stop();
1547 
1548 	raw_spin_lock_irqsave(&tr->start_lock, flags);
1549 	if (tr->stop_count++)
1550 		goto out;
1551 
1552 	buffer = tr->trace_buffer.buffer;
1553 	if (buffer)
1554 		ring_buffer_record_disable(buffer);
1555 
1556  out:
1557 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1558 }
1559 
1560 void trace_stop_cmdline_recording(void);
1561 
trace_save_cmdline(struct task_struct * tsk)1562 static int trace_save_cmdline(struct task_struct *tsk)
1563 {
1564 	unsigned tpid, idx;
1565 
1566 	/* treat recording of idle task as a success */
1567 	if (!tsk->pid)
1568 		return 1;
1569 
1570 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
1571 
1572 	/*
1573 	 * It's not the end of the world if we don't get
1574 	 * the lock, but we also don't want to spin
1575 	 * nor do we want to disable interrupts,
1576 	 * so if we miss here, then better luck next time.
1577 	 */
1578 	if (!arch_spin_trylock(&trace_cmdline_lock))
1579 		return 0;
1580 
1581 	idx = savedcmd->map_pid_to_cmdline[tpid];
1582 	if (idx == NO_CMDLINE_MAP) {
1583 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1584 
1585 		savedcmd->map_pid_to_cmdline[tpid] = idx;
1586 		savedcmd->cmdline_idx = idx;
1587 	}
1588 
1589 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1590 	set_cmdline(idx, tsk->comm);
1591 	saved_tgids[idx] = tsk->tgid;
1592 	arch_spin_unlock(&trace_cmdline_lock);
1593 
1594 	return 1;
1595 }
1596 
__trace_find_cmdline(int pid,char comm[])1597 static void __trace_find_cmdline(int pid, char comm[])
1598 {
1599 	unsigned map;
1600 	int tpid;
1601 
1602 	if (!pid) {
1603 		strcpy(comm, "<idle>");
1604 		return;
1605 	}
1606 
1607 	if (WARN_ON_ONCE(pid < 0)) {
1608 		strcpy(comm, "<XXX>");
1609 		return;
1610 	}
1611 
1612 	tpid = pid & (PID_MAX_DEFAULT - 1);
1613 	map = savedcmd->map_pid_to_cmdline[tpid];
1614 	if (map != NO_CMDLINE_MAP) {
1615 		tpid = savedcmd->map_cmdline_to_pid[map];
1616 		if (tpid == pid) {
1617 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1618 			return;
1619 		}
1620 	}
1621 	strcpy(comm, "<...>");
1622 }
1623 
trace_find_cmdline(int pid,char comm[])1624 void trace_find_cmdline(int pid, char comm[])
1625 {
1626 	preempt_disable();
1627 	arch_spin_lock(&trace_cmdline_lock);
1628 
1629 	__trace_find_cmdline(pid, comm);
1630 
1631 	arch_spin_unlock(&trace_cmdline_lock);
1632 	preempt_enable();
1633 }
1634 
trace_find_tgid(int pid)1635 int trace_find_tgid(int pid)
1636 {
1637 	unsigned map;
1638 	int tgid;
1639 
1640 	preempt_disable();
1641 	arch_spin_lock(&trace_cmdline_lock);
1642 	map = savedcmd->map_pid_to_cmdline[pid];
1643 	if (map != NO_CMDLINE_MAP)
1644 		tgid = saved_tgids[map];
1645 	else
1646 		tgid = -1;
1647 
1648 	arch_spin_unlock(&trace_cmdline_lock);
1649 	preempt_enable();
1650 
1651 	return tgid;
1652 }
1653 
tracing_record_cmdline(struct task_struct * tsk)1654 void tracing_record_cmdline(struct task_struct *tsk)
1655 {
1656 	if (!__this_cpu_read(trace_cmdline_save))
1657 		return;
1658 
1659 	if (trace_save_cmdline(tsk))
1660 		__this_cpu_write(trace_cmdline_save, false);
1661 }
1662 
1663 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned long flags,int pc)1664 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1665 			     int pc)
1666 {
1667 	struct task_struct *tsk = current;
1668 
1669 	entry->preempt_count		= pc & 0xff;
1670 	entry->pid			= (tsk) ? tsk->pid : 0;
1671 	entry->flags =
1672 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1673 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1674 #else
1675 		TRACE_FLAG_IRQS_NOSUPPORT |
1676 #endif
1677 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1678 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1679 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1680 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1681 }
1682 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1683 
1684 struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)1685 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1686 			  int type,
1687 			  unsigned long len,
1688 			  unsigned long flags, int pc)
1689 {
1690 	struct ring_buffer_event *event;
1691 
1692 	event = ring_buffer_lock_reserve(buffer, len);
1693 	if (event != NULL) {
1694 		struct trace_entry *ent = ring_buffer_event_data(event);
1695 
1696 		tracing_generic_entry_update(ent, flags, pc);
1697 		ent->type = type;
1698 	}
1699 
1700 	return event;
1701 }
1702 
1703 void
__buffer_unlock_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)1704 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1705 {
1706 	__this_cpu_write(trace_cmdline_save, true);
1707 	ring_buffer_unlock_commit(buffer, event);
1708 }
1709 
trace_buffer_unlock_commit(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc)1710 void trace_buffer_unlock_commit(struct trace_array *tr,
1711 				struct ring_buffer *buffer,
1712 				struct ring_buffer_event *event,
1713 				unsigned long flags, int pc)
1714 {
1715 	__buffer_unlock_commit(buffer, event);
1716 
1717 	ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1718 	ftrace_trace_userstack(tr, buffer, flags, pc);
1719 }
1720 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1721 
1722 static struct ring_buffer *temp_buffer;
1723 
1724 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)1725 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1726 			  struct trace_event_file *trace_file,
1727 			  int type, unsigned long len,
1728 			  unsigned long flags, int pc)
1729 {
1730 	struct ring_buffer_event *entry;
1731 
1732 	*current_rb = trace_file->tr->trace_buffer.buffer;
1733 	entry = trace_buffer_lock_reserve(*current_rb,
1734 					 type, len, flags, pc);
1735 	/*
1736 	 * If tracing is off, but we have triggers enabled
1737 	 * we still need to look at the event data. Use the temp_buffer
1738 	 * to store the trace event for the tigger to use. It's recusive
1739 	 * safe and will not be recorded anywhere.
1740 	 */
1741 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1742 		*current_rb = temp_buffer;
1743 		entry = trace_buffer_lock_reserve(*current_rb,
1744 						  type, len, flags, pc);
1745 	}
1746 	return entry;
1747 }
1748 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1749 
1750 struct ring_buffer_event *
trace_current_buffer_lock_reserve(struct ring_buffer ** current_rb,int type,unsigned long len,unsigned long flags,int pc)1751 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1752 				  int type, unsigned long len,
1753 				  unsigned long flags, int pc)
1754 {
1755 	*current_rb = global_trace.trace_buffer.buffer;
1756 	return trace_buffer_lock_reserve(*current_rb,
1757 					 type, len, flags, pc);
1758 }
1759 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1760 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct ring_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)1761 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1762 				     struct ring_buffer *buffer,
1763 				     struct ring_buffer_event *event,
1764 				     unsigned long flags, int pc,
1765 				     struct pt_regs *regs)
1766 {
1767 	__buffer_unlock_commit(buffer, event);
1768 
1769 	/*
1770 	 * If regs is not set, then skip the following callers:
1771 	 *   trace_buffer_unlock_commit_regs
1772 	 *   event_trigger_unlock_commit
1773 	 *   trace_event_buffer_commit
1774 	 *   trace_event_raw_event_sched_switch
1775 	 * Note, we can still get here via blktrace, wakeup tracer
1776 	 * and mmiotrace, but that's ok if they lose a function or
1777 	 * two. They are that meaningful.
1778 	 */
1779 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
1780 	ftrace_trace_userstack(tr, buffer, flags, pc);
1781 }
1782 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1783 
trace_current_buffer_discard_commit(struct ring_buffer * buffer,struct ring_buffer_event * event)1784 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1785 					 struct ring_buffer_event *event)
1786 {
1787 	ring_buffer_discard_commit(buffer, event);
1788 }
1789 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1790 
1791 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)1792 trace_function(struct trace_array *tr,
1793 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
1794 	       int pc)
1795 {
1796 	struct trace_event_call *call = &event_function;
1797 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
1798 	struct ring_buffer_event *event;
1799 	struct ftrace_entry *entry;
1800 
1801 	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1802 					  flags, pc);
1803 	if (!event)
1804 		return;
1805 	entry	= ring_buffer_event_data(event);
1806 	entry->ip			= ip;
1807 	entry->parent_ip		= parent_ip;
1808 
1809 	if (!call_filter_check_discard(call, entry, buffer, event))
1810 		__buffer_unlock_commit(buffer, event);
1811 }
1812 
1813 #ifdef CONFIG_STACKTRACE
1814 
1815 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1816 struct ftrace_stack {
1817 	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
1818 };
1819 
1820 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1821 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1822 
__ftrace_trace_stack(struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)1823 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1824 				 unsigned long flags,
1825 				 int skip, int pc, struct pt_regs *regs)
1826 {
1827 	struct trace_event_call *call = &event_kernel_stack;
1828 	struct ring_buffer_event *event;
1829 	struct stack_entry *entry;
1830 	struct stack_trace trace;
1831 	int use_stack;
1832 	int size = FTRACE_STACK_ENTRIES;
1833 
1834 	trace.nr_entries	= 0;
1835 	trace.skip		= skip;
1836 
1837 	/*
1838 	 * Add two, for this function and the call to save_stack_trace()
1839 	 * If regs is set, then these functions will not be in the way.
1840 	 */
1841 	if (!regs)
1842 		trace.skip += 2;
1843 
1844 	/*
1845 	 * Since events can happen in NMIs there's no safe way to
1846 	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1847 	 * or NMI comes in, it will just have to use the default
1848 	 * FTRACE_STACK_SIZE.
1849 	 */
1850 	preempt_disable_notrace();
1851 
1852 	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1853 	/*
1854 	 * We don't need any atomic variables, just a barrier.
1855 	 * If an interrupt comes in, we don't care, because it would
1856 	 * have exited and put the counter back to what we want.
1857 	 * We just need a barrier to keep gcc from moving things
1858 	 * around.
1859 	 */
1860 	barrier();
1861 	if (use_stack == 1) {
1862 		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
1863 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
1864 
1865 		if (regs)
1866 			save_stack_trace_regs(regs, &trace);
1867 		else
1868 			save_stack_trace(&trace);
1869 
1870 		if (trace.nr_entries > size)
1871 			size = trace.nr_entries;
1872 	} else
1873 		/* From now on, use_stack is a boolean */
1874 		use_stack = 0;
1875 
1876 	size *= sizeof(unsigned long);
1877 
1878 	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1879 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
1880 				    flags, pc);
1881 	if (!event)
1882 		goto out;
1883 	entry = ring_buffer_event_data(event);
1884 
1885 	memset(&entry->caller, 0, size);
1886 
1887 	if (use_stack)
1888 		memcpy(&entry->caller, trace.entries,
1889 		       trace.nr_entries * sizeof(unsigned long));
1890 	else {
1891 		trace.max_entries	= FTRACE_STACK_ENTRIES;
1892 		trace.entries		= entry->caller;
1893 		if (regs)
1894 			save_stack_trace_regs(regs, &trace);
1895 		else
1896 			save_stack_trace(&trace);
1897 	}
1898 
1899 	entry->size = trace.nr_entries;
1900 
1901 	if (!call_filter_check_discard(call, entry, buffer, event))
1902 		__buffer_unlock_commit(buffer, event);
1903 
1904  out:
1905 	/* Again, don't let gcc optimize things here */
1906 	barrier();
1907 	__this_cpu_dec(ftrace_stack_reserve);
1908 	preempt_enable_notrace();
1909 
1910 }
1911 
ftrace_trace_stack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)1912 static inline void ftrace_trace_stack(struct trace_array *tr,
1913 				      struct ring_buffer *buffer,
1914 				      unsigned long flags,
1915 				      int skip, int pc, struct pt_regs *regs)
1916 {
1917 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1918 		return;
1919 
1920 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1921 }
1922 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)1923 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1924 		   int pc)
1925 {
1926 	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1927 }
1928 
1929 /**
1930  * trace_dump_stack - record a stack back trace in the trace buffer
1931  * @skip: Number of functions to skip (helper handlers)
1932  */
trace_dump_stack(int skip)1933 void trace_dump_stack(int skip)
1934 {
1935 	unsigned long flags;
1936 
1937 	if (tracing_disabled || tracing_selftest_running)
1938 		return;
1939 
1940 	local_save_flags(flags);
1941 
1942 	/*
1943 	 * Skip 3 more, seems to get us at the caller of
1944 	 * this function.
1945 	 */
1946 	skip += 3;
1947 	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
1948 			     flags, skip, preempt_count(), NULL);
1949 }
1950 
1951 static DEFINE_PER_CPU(int, user_stack_count);
1952 
1953 void
ftrace_trace_userstack(struct trace_array * tr,struct ring_buffer * buffer,unsigned long flags,int pc)1954 ftrace_trace_userstack(struct trace_array *tr,
1955 		       struct ring_buffer *buffer, unsigned long flags, int pc)
1956 {
1957 	struct trace_event_call *call = &event_user_stack;
1958 	struct ring_buffer_event *event;
1959 	struct userstack_entry *entry;
1960 	struct stack_trace trace;
1961 
1962 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
1963 		return;
1964 
1965 	/*
1966 	 * NMIs can not handle page faults, even with fix ups.
1967 	 * The save user stack can (and often does) fault.
1968 	 */
1969 	if (unlikely(in_nmi()))
1970 		return;
1971 
1972 	/*
1973 	 * prevent recursion, since the user stack tracing may
1974 	 * trigger other kernel events.
1975 	 */
1976 	preempt_disable();
1977 	if (__this_cpu_read(user_stack_count))
1978 		goto out;
1979 
1980 	__this_cpu_inc(user_stack_count);
1981 
1982 	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1983 					  sizeof(*entry), flags, pc);
1984 	if (!event)
1985 		goto out_drop_count;
1986 	entry	= ring_buffer_event_data(event);
1987 
1988 	entry->tgid		= current->tgid;
1989 	memset(&entry->caller, 0, sizeof(entry->caller));
1990 
1991 	trace.nr_entries	= 0;
1992 	trace.max_entries	= FTRACE_STACK_ENTRIES;
1993 	trace.skip		= 0;
1994 	trace.entries		= entry->caller;
1995 
1996 	save_stack_trace_user(&trace);
1997 	if (!call_filter_check_discard(call, entry, buffer, event))
1998 		__buffer_unlock_commit(buffer, event);
1999 
2000  out_drop_count:
2001 	__this_cpu_dec(user_stack_count);
2002  out:
2003 	preempt_enable();
2004 }
2005 
2006 #ifdef UNUSED
__trace_userstack(struct trace_array * tr,unsigned long flags)2007 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2008 {
2009 	ftrace_trace_userstack(tr, flags, preempt_count());
2010 }
2011 #endif /* UNUSED */
2012 
2013 #endif /* CONFIG_STACKTRACE */
2014 
2015 /* created for use with alloc_percpu */
2016 struct trace_buffer_struct {
2017 	char buffer[TRACE_BUF_SIZE];
2018 };
2019 
2020 static struct trace_buffer_struct *trace_percpu_buffer;
2021 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2022 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2023 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2024 
2025 /*
2026  * The buffer used is dependent on the context. There is a per cpu
2027  * buffer for normal context, softirq contex, hard irq context and
2028  * for NMI context. Thise allows for lockless recording.
2029  *
2030  * Note, if the buffers failed to be allocated, then this returns NULL
2031  */
get_trace_buf(void)2032 static char *get_trace_buf(void)
2033 {
2034 	struct trace_buffer_struct *percpu_buffer;
2035 
2036 	/*
2037 	 * If we have allocated per cpu buffers, then we do not
2038 	 * need to do any locking.
2039 	 */
2040 	if (in_nmi())
2041 		percpu_buffer = trace_percpu_nmi_buffer;
2042 	else if (in_irq())
2043 		percpu_buffer = trace_percpu_irq_buffer;
2044 	else if (in_softirq())
2045 		percpu_buffer = trace_percpu_sirq_buffer;
2046 	else
2047 		percpu_buffer = trace_percpu_buffer;
2048 
2049 	if (!percpu_buffer)
2050 		return NULL;
2051 
2052 	return this_cpu_ptr(&percpu_buffer->buffer[0]);
2053 }
2054 
alloc_percpu_trace_buffer(void)2055 static int alloc_percpu_trace_buffer(void)
2056 {
2057 	struct trace_buffer_struct *buffers;
2058 	struct trace_buffer_struct *sirq_buffers;
2059 	struct trace_buffer_struct *irq_buffers;
2060 	struct trace_buffer_struct *nmi_buffers;
2061 
2062 	buffers = alloc_percpu(struct trace_buffer_struct);
2063 	if (!buffers)
2064 		goto err_warn;
2065 
2066 	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2067 	if (!sirq_buffers)
2068 		goto err_sirq;
2069 
2070 	irq_buffers = alloc_percpu(struct trace_buffer_struct);
2071 	if (!irq_buffers)
2072 		goto err_irq;
2073 
2074 	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2075 	if (!nmi_buffers)
2076 		goto err_nmi;
2077 
2078 	trace_percpu_buffer = buffers;
2079 	trace_percpu_sirq_buffer = sirq_buffers;
2080 	trace_percpu_irq_buffer = irq_buffers;
2081 	trace_percpu_nmi_buffer = nmi_buffers;
2082 
2083 	return 0;
2084 
2085  err_nmi:
2086 	free_percpu(irq_buffers);
2087  err_irq:
2088 	free_percpu(sirq_buffers);
2089  err_sirq:
2090 	free_percpu(buffers);
2091  err_warn:
2092 	WARN(1, "Could not allocate percpu trace_printk buffer");
2093 	return -ENOMEM;
2094 }
2095 
2096 static int buffers_allocated;
2097 
trace_printk_init_buffers(void)2098 void trace_printk_init_buffers(void)
2099 {
2100 	if (buffers_allocated)
2101 		return;
2102 
2103 	if (alloc_percpu_trace_buffer())
2104 		return;
2105 
2106 	/* trace_printk() is for debug use only. Don't use it in production. */
2107 
2108 	pr_warning("\n");
2109 	pr_warning("**********************************************************\n");
2110 	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2111 	pr_warning("**                                                      **\n");
2112 	pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2113 	pr_warning("**                                                      **\n");
2114 	pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2115 	pr_warning("** unsafe for production use.                           **\n");
2116 	pr_warning("**                                                      **\n");
2117 	pr_warning("** If you see this message and you are not debugging    **\n");
2118 	pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2119 	pr_warning("**                                                      **\n");
2120 	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2121 	pr_warning("**********************************************************\n");
2122 
2123 	/* Expand the buffers to set size */
2124 	tracing_update_buffers();
2125 
2126 	buffers_allocated = 1;
2127 
2128 	/*
2129 	 * trace_printk_init_buffers() can be called by modules.
2130 	 * If that happens, then we need to start cmdline recording
2131 	 * directly here. If the global_trace.buffer is already
2132 	 * allocated here, then this was called by module code.
2133 	 */
2134 	if (global_trace.trace_buffer.buffer)
2135 		tracing_start_cmdline_record();
2136 }
2137 
trace_printk_start_comm(void)2138 void trace_printk_start_comm(void)
2139 {
2140 	/* Start tracing comms if trace printk is set */
2141 	if (!buffers_allocated)
2142 		return;
2143 	tracing_start_cmdline_record();
2144 }
2145 
trace_printk_start_stop_comm(int enabled)2146 static void trace_printk_start_stop_comm(int enabled)
2147 {
2148 	if (!buffers_allocated)
2149 		return;
2150 
2151 	if (enabled)
2152 		tracing_start_cmdline_record();
2153 	else
2154 		tracing_stop_cmdline_record();
2155 }
2156 
2157 /**
2158  * trace_vbprintk - write binary msg to tracing buffer
2159  *
2160  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)2161 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2162 {
2163 	struct trace_event_call *call = &event_bprint;
2164 	struct ring_buffer_event *event;
2165 	struct ring_buffer *buffer;
2166 	struct trace_array *tr = &global_trace;
2167 	struct bprint_entry *entry;
2168 	unsigned long flags;
2169 	char *tbuffer;
2170 	int len = 0, size, pc;
2171 
2172 	if (unlikely(tracing_selftest_running || tracing_disabled))
2173 		return 0;
2174 
2175 	/* Don't pollute graph traces with trace_vprintk internals */
2176 	pause_graph_tracing();
2177 
2178 	pc = preempt_count();
2179 	preempt_disable_notrace();
2180 
2181 	tbuffer = get_trace_buf();
2182 	if (!tbuffer) {
2183 		len = 0;
2184 		goto out;
2185 	}
2186 
2187 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2188 
2189 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2190 		goto out;
2191 
2192 	local_save_flags(flags);
2193 	size = sizeof(*entry) + sizeof(u32) * len;
2194 	buffer = tr->trace_buffer.buffer;
2195 	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2196 					  flags, pc);
2197 	if (!event)
2198 		goto out;
2199 	entry = ring_buffer_event_data(event);
2200 	entry->ip			= ip;
2201 	entry->fmt			= fmt;
2202 
2203 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2204 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2205 		__buffer_unlock_commit(buffer, event);
2206 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2207 	}
2208 
2209 out:
2210 	preempt_enable_notrace();
2211 	unpause_graph_tracing();
2212 
2213 	return len;
2214 }
2215 EXPORT_SYMBOL_GPL(trace_vbprintk);
2216 
2217 __printf(3, 0)
2218 static int
__trace_array_vprintk(struct ring_buffer * buffer,unsigned long ip,const char * fmt,va_list args)2219 __trace_array_vprintk(struct ring_buffer *buffer,
2220 		      unsigned long ip, const char *fmt, va_list args)
2221 {
2222 	struct trace_event_call *call = &event_print;
2223 	struct ring_buffer_event *event;
2224 	int len = 0, size, pc;
2225 	struct print_entry *entry;
2226 	unsigned long flags;
2227 	char *tbuffer;
2228 
2229 	if (tracing_disabled || tracing_selftest_running)
2230 		return 0;
2231 
2232 	/* Don't pollute graph traces with trace_vprintk internals */
2233 	pause_graph_tracing();
2234 
2235 	pc = preempt_count();
2236 	preempt_disable_notrace();
2237 
2238 
2239 	tbuffer = get_trace_buf();
2240 	if (!tbuffer) {
2241 		len = 0;
2242 		goto out;
2243 	}
2244 
2245 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2246 
2247 	local_save_flags(flags);
2248 	size = sizeof(*entry) + len + 1;
2249 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2250 					  flags, pc);
2251 	if (!event)
2252 		goto out;
2253 	entry = ring_buffer_event_data(event);
2254 	entry->ip = ip;
2255 
2256 	memcpy(&entry->buf, tbuffer, len + 1);
2257 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2258 		__buffer_unlock_commit(buffer, event);
2259 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2260 	}
2261  out:
2262 	preempt_enable_notrace();
2263 	unpause_graph_tracing();
2264 
2265 	return len;
2266 }
2267 
2268 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)2269 int trace_array_vprintk(struct trace_array *tr,
2270 			unsigned long ip, const char *fmt, va_list args)
2271 {
2272 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2273 }
2274 
2275 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)2276 int trace_array_printk(struct trace_array *tr,
2277 		       unsigned long ip, const char *fmt, ...)
2278 {
2279 	int ret;
2280 	va_list ap;
2281 
2282 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2283 		return 0;
2284 
2285 	if (!tr)
2286 		return -ENOENT;
2287 
2288 	va_start(ap, fmt);
2289 	ret = trace_array_vprintk(tr, ip, fmt, ap);
2290 	va_end(ap);
2291 	return ret;
2292 }
2293 
2294 __printf(3, 4)
trace_array_printk_buf(struct ring_buffer * buffer,unsigned long ip,const char * fmt,...)2295 int trace_array_printk_buf(struct ring_buffer *buffer,
2296 			   unsigned long ip, const char *fmt, ...)
2297 {
2298 	int ret;
2299 	va_list ap;
2300 
2301 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2302 		return 0;
2303 
2304 	va_start(ap, fmt);
2305 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2306 	va_end(ap);
2307 	return ret;
2308 }
2309 
2310 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)2311 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2312 {
2313 	return trace_array_vprintk(&global_trace, ip, fmt, args);
2314 }
2315 EXPORT_SYMBOL_GPL(trace_vprintk);
2316 
trace_iterator_increment(struct trace_iterator * iter)2317 static void trace_iterator_increment(struct trace_iterator *iter)
2318 {
2319 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2320 
2321 	iter->idx++;
2322 	if (buf_iter)
2323 		ring_buffer_read(buf_iter, NULL);
2324 }
2325 
2326 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2327 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2328 		unsigned long *lost_events)
2329 {
2330 	struct ring_buffer_event *event;
2331 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2332 
2333 	if (buf_iter)
2334 		event = ring_buffer_iter_peek(buf_iter, ts);
2335 	else
2336 		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2337 					 lost_events);
2338 
2339 	if (event) {
2340 		iter->ent_size = ring_buffer_event_length(event);
2341 		return ring_buffer_event_data(event);
2342 	}
2343 	iter->ent_size = 0;
2344 	return NULL;
2345 }
2346 
2347 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2348 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2349 		  unsigned long *missing_events, u64 *ent_ts)
2350 {
2351 	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2352 	struct trace_entry *ent, *next = NULL;
2353 	unsigned long lost_events = 0, next_lost = 0;
2354 	int cpu_file = iter->cpu_file;
2355 	u64 next_ts = 0, ts;
2356 	int next_cpu = -1;
2357 	int next_size = 0;
2358 	int cpu;
2359 
2360 	/*
2361 	 * If we are in a per_cpu trace file, don't bother by iterating over
2362 	 * all cpu and peek directly.
2363 	 */
2364 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2365 		if (ring_buffer_empty_cpu(buffer, cpu_file))
2366 			return NULL;
2367 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2368 		if (ent_cpu)
2369 			*ent_cpu = cpu_file;
2370 
2371 		return ent;
2372 	}
2373 
2374 	for_each_tracing_cpu(cpu) {
2375 
2376 		if (ring_buffer_empty_cpu(buffer, cpu))
2377 			continue;
2378 
2379 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2380 
2381 		/*
2382 		 * Pick the entry with the smallest timestamp:
2383 		 */
2384 		if (ent && (!next || ts < next_ts)) {
2385 			next = ent;
2386 			next_cpu = cpu;
2387 			next_ts = ts;
2388 			next_lost = lost_events;
2389 			next_size = iter->ent_size;
2390 		}
2391 	}
2392 
2393 	iter->ent_size = next_size;
2394 
2395 	if (ent_cpu)
2396 		*ent_cpu = next_cpu;
2397 
2398 	if (ent_ts)
2399 		*ent_ts = next_ts;
2400 
2401 	if (missing_events)
2402 		*missing_events = next_lost;
2403 
2404 	return next;
2405 }
2406 
2407 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)2408 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2409 					  int *ent_cpu, u64 *ent_ts)
2410 {
2411 	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2412 }
2413 
2414 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)2415 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2416 {
2417 	iter->ent = __find_next_entry(iter, &iter->cpu,
2418 				      &iter->lost_events, &iter->ts);
2419 
2420 	if (iter->ent)
2421 		trace_iterator_increment(iter);
2422 
2423 	return iter->ent ? iter : NULL;
2424 }
2425 
trace_consume(struct trace_iterator * iter)2426 static void trace_consume(struct trace_iterator *iter)
2427 {
2428 	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2429 			    &iter->lost_events);
2430 }
2431 
s_next(struct seq_file * m,void * v,loff_t * pos)2432 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2433 {
2434 	struct trace_iterator *iter = m->private;
2435 	int i = (int)*pos;
2436 	void *ent;
2437 
2438 	WARN_ON_ONCE(iter->leftover);
2439 
2440 	(*pos)++;
2441 
2442 	/* can't go backwards */
2443 	if (iter->idx > i)
2444 		return NULL;
2445 
2446 	if (iter->idx < 0)
2447 		ent = trace_find_next_entry_inc(iter);
2448 	else
2449 		ent = iter;
2450 
2451 	while (ent && iter->idx < i)
2452 		ent = trace_find_next_entry_inc(iter);
2453 
2454 	iter->pos = *pos;
2455 
2456 	return ent;
2457 }
2458 
tracing_iter_reset(struct trace_iterator * iter,int cpu)2459 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2460 {
2461 	struct ring_buffer_event *event;
2462 	struct ring_buffer_iter *buf_iter;
2463 	unsigned long entries = 0;
2464 	u64 ts;
2465 
2466 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2467 
2468 	buf_iter = trace_buffer_iter(iter, cpu);
2469 	if (!buf_iter)
2470 		return;
2471 
2472 	ring_buffer_iter_reset(buf_iter);
2473 
2474 	/*
2475 	 * We could have the case with the max latency tracers
2476 	 * that a reset never took place on a cpu. This is evident
2477 	 * by the timestamp being before the start of the buffer.
2478 	 */
2479 	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2480 		if (ts >= iter->trace_buffer->time_start)
2481 			break;
2482 		entries++;
2483 		ring_buffer_read(buf_iter, NULL);
2484 	}
2485 
2486 	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2487 }
2488 
2489 /*
2490  * The current tracer is copied to avoid a global locking
2491  * all around.
2492  */
s_start(struct seq_file * m,loff_t * pos)2493 static void *s_start(struct seq_file *m, loff_t *pos)
2494 {
2495 	struct trace_iterator *iter = m->private;
2496 	struct trace_array *tr = iter->tr;
2497 	int cpu_file = iter->cpu_file;
2498 	void *p = NULL;
2499 	loff_t l = 0;
2500 	int cpu;
2501 
2502 	/*
2503 	 * copy the tracer to avoid using a global lock all around.
2504 	 * iter->trace is a copy of current_trace, the pointer to the
2505 	 * name may be used instead of a strcmp(), as iter->trace->name
2506 	 * will point to the same string as current_trace->name.
2507 	 */
2508 	mutex_lock(&trace_types_lock);
2509 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2510 		*iter->trace = *tr->current_trace;
2511 	mutex_unlock(&trace_types_lock);
2512 
2513 #ifdef CONFIG_TRACER_MAX_TRACE
2514 	if (iter->snapshot && iter->trace->use_max_tr)
2515 		return ERR_PTR(-EBUSY);
2516 #endif
2517 
2518 	if (*pos != iter->pos) {
2519 		iter->ent = NULL;
2520 		iter->cpu = 0;
2521 		iter->idx = -1;
2522 
2523 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2524 			for_each_tracing_cpu(cpu)
2525 				tracing_iter_reset(iter, cpu);
2526 		} else
2527 			tracing_iter_reset(iter, cpu_file);
2528 
2529 		iter->leftover = 0;
2530 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2531 			;
2532 
2533 	} else {
2534 		/*
2535 		 * If we overflowed the seq_file before, then we want
2536 		 * to just reuse the trace_seq buffer again.
2537 		 */
2538 		if (iter->leftover)
2539 			p = iter;
2540 		else {
2541 			l = *pos - 1;
2542 			p = s_next(m, p, &l);
2543 		}
2544 	}
2545 
2546 	trace_event_read_lock();
2547 	trace_access_lock(cpu_file);
2548 	return p;
2549 }
2550 
s_stop(struct seq_file * m,void * p)2551 static void s_stop(struct seq_file *m, void *p)
2552 {
2553 	struct trace_iterator *iter = m->private;
2554 
2555 #ifdef CONFIG_TRACER_MAX_TRACE
2556 	if (iter->snapshot && iter->trace->use_max_tr)
2557 		return;
2558 #endif
2559 
2560 	trace_access_unlock(iter->cpu_file);
2561 	trace_event_read_unlock();
2562 }
2563 
2564 static void
get_total_entries(struct trace_buffer * buf,unsigned long * total,unsigned long * entries)2565 get_total_entries(struct trace_buffer *buf,
2566 		  unsigned long *total, unsigned long *entries)
2567 {
2568 	unsigned long count;
2569 	int cpu;
2570 
2571 	*total = 0;
2572 	*entries = 0;
2573 
2574 	for_each_tracing_cpu(cpu) {
2575 		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2576 		/*
2577 		 * If this buffer has skipped entries, then we hold all
2578 		 * entries for the trace and we need to ignore the
2579 		 * ones before the time stamp.
2580 		 */
2581 		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2582 			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2583 			/* total is the same as the entries */
2584 			*total += count;
2585 		} else
2586 			*total += count +
2587 				ring_buffer_overrun_cpu(buf->buffer, cpu);
2588 		*entries += count;
2589 	}
2590 }
2591 
print_lat_help_header(struct seq_file * m)2592 static void print_lat_help_header(struct seq_file *m)
2593 {
2594 	seq_puts(m, "#                  _------=> CPU#            \n"
2595 		    "#                 / _-----=> irqs-off        \n"
2596 		    "#                | / _----=> need-resched    \n"
2597 		    "#                || / _---=> hardirq/softirq \n"
2598 		    "#                ||| / _--=> preempt-depth   \n"
2599 		    "#                |||| /     delay            \n"
2600 		    "#  cmd     pid   ||||| time  |   caller      \n"
2601 		    "#     \\   /      |||||  \\    |   /         \n");
2602 }
2603 
print_event_info(struct trace_buffer * buf,struct seq_file * m)2604 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2605 {
2606 	unsigned long total;
2607 	unsigned long entries;
2608 
2609 	get_total_entries(buf, &total, &entries);
2610 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2611 		   entries, total, num_online_cpus());
2612 	seq_puts(m, "#\n");
2613 }
2614 
print_func_help_header(struct trace_buffer * buf,struct seq_file * m)2615 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2616 {
2617 	print_event_info(buf, m);
2618 	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2619 		    "#              | |       |          |         |\n");
2620 }
2621 
print_func_help_header_tgid(struct trace_buffer * buf,struct seq_file * m)2622 static void print_func_help_header_tgid(struct trace_buffer *buf, struct seq_file *m)
2623 {
2624 	print_event_info(buf, m);
2625 	seq_puts(m, "#           TASK-PID    TGID   CPU#      TIMESTAMP  FUNCTION\n");
2626 	seq_puts(m, "#              | |        |      |          |         |\n");
2627 }
2628 
print_func_help_header_irq(struct trace_buffer * buf,struct seq_file * m)2629 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2630 {
2631 	print_event_info(buf, m);
2632 	seq_puts(m, "#                              _-----=> irqs-off\n"
2633 		    "#                             / _----=> need-resched\n"
2634 		    "#                            | / _---=> hardirq/softirq\n"
2635 		    "#                            || / _--=> preempt-depth\n"
2636 		    "#                            ||| /     delay\n"
2637 		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2638 		    "#              | |       |   ||||       |         |\n");
2639 }
2640 
print_func_help_header_irq_tgid(struct trace_buffer * buf,struct seq_file * m)2641 static void print_func_help_header_irq_tgid(struct trace_buffer *buf, struct seq_file *m)
2642 {
2643 	print_event_info(buf, m);
2644 	seq_puts(m, "#                                      _-----=> irqs-off\n");
2645 	seq_puts(m, "#                                     / _----=> need-resched\n");
2646 	seq_puts(m, "#                                    | / _---=> hardirq/softirq\n");
2647 	seq_puts(m, "#                                    || / _--=> preempt-depth\n");
2648 	seq_puts(m, "#                                    ||| /     delay\n");
2649 	seq_puts(m, "#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2650 	seq_puts(m, "#              | |        |      |   ||||       |         |\n");
2651 }
2652 
2653 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)2654 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2655 {
2656 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2657 	struct trace_buffer *buf = iter->trace_buffer;
2658 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2659 	struct tracer *type = iter->trace;
2660 	unsigned long entries;
2661 	unsigned long total;
2662 	const char *name = "preemption";
2663 
2664 	name = type->name;
2665 
2666 	get_total_entries(buf, &total, &entries);
2667 
2668 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2669 		   name, UTS_RELEASE);
2670 	seq_puts(m, "# -----------------------------------"
2671 		 "---------------------------------\n");
2672 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2673 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2674 		   nsecs_to_usecs(data->saved_latency),
2675 		   entries,
2676 		   total,
2677 		   buf->cpu,
2678 #if defined(CONFIG_PREEMPT_NONE)
2679 		   "server",
2680 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2681 		   "desktop",
2682 #elif defined(CONFIG_PREEMPT)
2683 		   "preempt",
2684 #else
2685 		   "unknown",
2686 #endif
2687 		   /* These are reserved for later use */
2688 		   0, 0, 0, 0);
2689 #ifdef CONFIG_SMP
2690 	seq_printf(m, " #P:%d)\n", num_online_cpus());
2691 #else
2692 	seq_puts(m, ")\n");
2693 #endif
2694 	seq_puts(m, "#    -----------------\n");
2695 	seq_printf(m, "#    | task: %.16s-%d "
2696 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2697 		   data->comm, data->pid,
2698 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2699 		   data->policy, data->rt_priority);
2700 	seq_puts(m, "#    -----------------\n");
2701 
2702 	if (data->critical_start) {
2703 		seq_puts(m, "#  => started at: ");
2704 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2705 		trace_print_seq(m, &iter->seq);
2706 		seq_puts(m, "\n#  => ended at:   ");
2707 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2708 		trace_print_seq(m, &iter->seq);
2709 		seq_puts(m, "\n#\n");
2710 	}
2711 
2712 	seq_puts(m, "#\n");
2713 }
2714 
test_cpu_buff_start(struct trace_iterator * iter)2715 static void test_cpu_buff_start(struct trace_iterator *iter)
2716 {
2717 	struct trace_seq *s = &iter->seq;
2718 	struct trace_array *tr = iter->tr;
2719 
2720 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2721 		return;
2722 
2723 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2724 		return;
2725 
2726 	if (cpumask_available(iter->started) &&
2727 	    cpumask_test_cpu(iter->cpu, iter->started))
2728 		return;
2729 
2730 	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2731 		return;
2732 
2733 	if (cpumask_available(iter->started))
2734 		cpumask_set_cpu(iter->cpu, iter->started);
2735 
2736 	/* Don't print started cpu buffer for the first entry of the trace */
2737 	if (iter->idx > 1)
2738 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2739 				iter->cpu);
2740 }
2741 
print_trace_fmt(struct trace_iterator * iter)2742 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2743 {
2744 	struct trace_array *tr = iter->tr;
2745 	struct trace_seq *s = &iter->seq;
2746 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2747 	struct trace_entry *entry;
2748 	struct trace_event *event;
2749 
2750 	entry = iter->ent;
2751 
2752 	test_cpu_buff_start(iter);
2753 
2754 	event = ftrace_find_event(entry->type);
2755 
2756 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2757 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2758 			trace_print_lat_context(iter);
2759 		else
2760 			trace_print_context(iter);
2761 	}
2762 
2763 	if (trace_seq_has_overflowed(s))
2764 		return TRACE_TYPE_PARTIAL_LINE;
2765 
2766 	if (event)
2767 		return event->funcs->trace(iter, sym_flags, event);
2768 
2769 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
2770 
2771 	return trace_handle_return(s);
2772 }
2773 
print_raw_fmt(struct trace_iterator * iter)2774 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2775 {
2776 	struct trace_array *tr = iter->tr;
2777 	struct trace_seq *s = &iter->seq;
2778 	struct trace_entry *entry;
2779 	struct trace_event *event;
2780 
2781 	entry = iter->ent;
2782 
2783 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2784 		trace_seq_printf(s, "%d %d %llu ",
2785 				 entry->pid, iter->cpu, iter->ts);
2786 
2787 	if (trace_seq_has_overflowed(s))
2788 		return TRACE_TYPE_PARTIAL_LINE;
2789 
2790 	event = ftrace_find_event(entry->type);
2791 	if (event)
2792 		return event->funcs->raw(iter, 0, event);
2793 
2794 	trace_seq_printf(s, "%d ?\n", entry->type);
2795 
2796 	return trace_handle_return(s);
2797 }
2798 
print_hex_fmt(struct trace_iterator * iter)2799 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2800 {
2801 	struct trace_array *tr = iter->tr;
2802 	struct trace_seq *s = &iter->seq;
2803 	unsigned char newline = '\n';
2804 	struct trace_entry *entry;
2805 	struct trace_event *event;
2806 
2807 	entry = iter->ent;
2808 
2809 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2810 		SEQ_PUT_HEX_FIELD(s, entry->pid);
2811 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
2812 		SEQ_PUT_HEX_FIELD(s, iter->ts);
2813 		if (trace_seq_has_overflowed(s))
2814 			return TRACE_TYPE_PARTIAL_LINE;
2815 	}
2816 
2817 	event = ftrace_find_event(entry->type);
2818 	if (event) {
2819 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2820 		if (ret != TRACE_TYPE_HANDLED)
2821 			return ret;
2822 	}
2823 
2824 	SEQ_PUT_FIELD(s, newline);
2825 
2826 	return trace_handle_return(s);
2827 }
2828 
print_bin_fmt(struct trace_iterator * iter)2829 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2830 {
2831 	struct trace_array *tr = iter->tr;
2832 	struct trace_seq *s = &iter->seq;
2833 	struct trace_entry *entry;
2834 	struct trace_event *event;
2835 
2836 	entry = iter->ent;
2837 
2838 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2839 		SEQ_PUT_FIELD(s, entry->pid);
2840 		SEQ_PUT_FIELD(s, iter->cpu);
2841 		SEQ_PUT_FIELD(s, iter->ts);
2842 		if (trace_seq_has_overflowed(s))
2843 			return TRACE_TYPE_PARTIAL_LINE;
2844 	}
2845 
2846 	event = ftrace_find_event(entry->type);
2847 	return event ? event->funcs->binary(iter, 0, event) :
2848 		TRACE_TYPE_HANDLED;
2849 }
2850 
trace_empty(struct trace_iterator * iter)2851 int trace_empty(struct trace_iterator *iter)
2852 {
2853 	struct ring_buffer_iter *buf_iter;
2854 	int cpu;
2855 
2856 	/* If we are looking at one CPU buffer, only check that one */
2857 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2858 		cpu = iter->cpu_file;
2859 		buf_iter = trace_buffer_iter(iter, cpu);
2860 		if (buf_iter) {
2861 			if (!ring_buffer_iter_empty(buf_iter))
2862 				return 0;
2863 		} else {
2864 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2865 				return 0;
2866 		}
2867 		return 1;
2868 	}
2869 
2870 	for_each_tracing_cpu(cpu) {
2871 		buf_iter = trace_buffer_iter(iter, cpu);
2872 		if (buf_iter) {
2873 			if (!ring_buffer_iter_empty(buf_iter))
2874 				return 0;
2875 		} else {
2876 			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2877 				return 0;
2878 		}
2879 	}
2880 
2881 	return 1;
2882 }
2883 
2884 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)2885 enum print_line_t print_trace_line(struct trace_iterator *iter)
2886 {
2887 	struct trace_array *tr = iter->tr;
2888 	unsigned long trace_flags = tr->trace_flags;
2889 	enum print_line_t ret;
2890 
2891 	if (iter->lost_events) {
2892 		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2893 				 iter->cpu, iter->lost_events);
2894 		if (trace_seq_has_overflowed(&iter->seq))
2895 			return TRACE_TYPE_PARTIAL_LINE;
2896 	}
2897 
2898 	if (iter->trace && iter->trace->print_line) {
2899 		ret = iter->trace->print_line(iter);
2900 		if (ret != TRACE_TYPE_UNHANDLED)
2901 			return ret;
2902 	}
2903 
2904 	if (iter->ent->type == TRACE_BPUTS &&
2905 			trace_flags & TRACE_ITER_PRINTK &&
2906 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2907 		return trace_print_bputs_msg_only(iter);
2908 
2909 	if (iter->ent->type == TRACE_BPRINT &&
2910 			trace_flags & TRACE_ITER_PRINTK &&
2911 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2912 		return trace_print_bprintk_msg_only(iter);
2913 
2914 	if (iter->ent->type == TRACE_PRINT &&
2915 			trace_flags & TRACE_ITER_PRINTK &&
2916 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2917 		return trace_print_printk_msg_only(iter);
2918 
2919 	if (trace_flags & TRACE_ITER_BIN)
2920 		return print_bin_fmt(iter);
2921 
2922 	if (trace_flags & TRACE_ITER_HEX)
2923 		return print_hex_fmt(iter);
2924 
2925 	if (trace_flags & TRACE_ITER_RAW)
2926 		return print_raw_fmt(iter);
2927 
2928 	return print_trace_fmt(iter);
2929 }
2930 
trace_latency_header(struct seq_file * m)2931 void trace_latency_header(struct seq_file *m)
2932 {
2933 	struct trace_iterator *iter = m->private;
2934 	struct trace_array *tr = iter->tr;
2935 
2936 	/* print nothing if the buffers are empty */
2937 	if (trace_empty(iter))
2938 		return;
2939 
2940 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2941 		print_trace_header(m, iter);
2942 
2943 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2944 		print_lat_help_header(m);
2945 }
2946 
trace_default_header(struct seq_file * m)2947 void trace_default_header(struct seq_file *m)
2948 {
2949 	struct trace_iterator *iter = m->private;
2950 	struct trace_array *tr = iter->tr;
2951 	unsigned long trace_flags = tr->trace_flags;
2952 
2953 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2954 		return;
2955 
2956 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2957 		/* print nothing if the buffers are empty */
2958 		if (trace_empty(iter))
2959 			return;
2960 		print_trace_header(m, iter);
2961 		if (!(trace_flags & TRACE_ITER_VERBOSE))
2962 			print_lat_help_header(m);
2963 	} else {
2964 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2965 			if (trace_flags & TRACE_ITER_IRQ_INFO)
2966 				if (trace_flags & TRACE_ITER_TGID)
2967 					print_func_help_header_irq_tgid(iter->trace_buffer, m);
2968 				else
2969 					print_func_help_header_irq(iter->trace_buffer, m);
2970 			else
2971 				if (trace_flags & TRACE_ITER_TGID)
2972 					print_func_help_header_tgid(iter->trace_buffer, m);
2973 				else
2974 					print_func_help_header(iter->trace_buffer, m);
2975 		}
2976 	}
2977 }
2978 
test_ftrace_alive(struct seq_file * m)2979 static void test_ftrace_alive(struct seq_file *m)
2980 {
2981 	if (!ftrace_is_dead())
2982 		return;
2983 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2984 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
2985 }
2986 
2987 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)2988 static void show_snapshot_main_help(struct seq_file *m)
2989 {
2990 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2991 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2992 		    "#                      Takes a snapshot of the main buffer.\n"
2993 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2994 		    "#                      (Doesn't have to be '2' works with any number that\n"
2995 		    "#                       is not a '0' or '1')\n");
2996 }
2997 
show_snapshot_percpu_help(struct seq_file * m)2998 static void show_snapshot_percpu_help(struct seq_file *m)
2999 {
3000 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3001 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3002 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3003 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
3004 #else
3005 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3006 		    "#                     Must use main snapshot file to allocate.\n");
3007 #endif
3008 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3009 		    "#                      (Doesn't have to be '2' works with any number that\n"
3010 		    "#                       is not a '0' or '1')\n");
3011 }
3012 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3013 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3014 {
3015 	if (iter->tr->allocated_snapshot)
3016 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3017 	else
3018 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3019 
3020 	seq_puts(m, "# Snapshot commands:\n");
3021 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3022 		show_snapshot_main_help(m);
3023 	else
3024 		show_snapshot_percpu_help(m);
3025 }
3026 #else
3027 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)3028 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3029 #endif
3030 
s_show(struct seq_file * m,void * v)3031 static int s_show(struct seq_file *m, void *v)
3032 {
3033 	struct trace_iterator *iter = v;
3034 	int ret;
3035 
3036 	if (iter->ent == NULL) {
3037 		if (iter->tr) {
3038 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
3039 			seq_puts(m, "#\n");
3040 			test_ftrace_alive(m);
3041 		}
3042 		if (iter->snapshot && trace_empty(iter))
3043 			print_snapshot_help(m, iter);
3044 		else if (iter->trace && iter->trace->print_header)
3045 			iter->trace->print_header(m);
3046 		else
3047 			trace_default_header(m);
3048 
3049 	} else if (iter->leftover) {
3050 		/*
3051 		 * If we filled the seq_file buffer earlier, we
3052 		 * want to just show it now.
3053 		 */
3054 		ret = trace_print_seq(m, &iter->seq);
3055 
3056 		/* ret should this time be zero, but you never know */
3057 		iter->leftover = ret;
3058 
3059 	} else {
3060 		print_trace_line(iter);
3061 		ret = trace_print_seq(m, &iter->seq);
3062 		/*
3063 		 * If we overflow the seq_file buffer, then it will
3064 		 * ask us for this data again at start up.
3065 		 * Use that instead.
3066 		 *  ret is 0 if seq_file write succeeded.
3067 		 *        -1 otherwise.
3068 		 */
3069 		iter->leftover = ret;
3070 	}
3071 
3072 	return 0;
3073 }
3074 
3075 /*
3076  * Should be used after trace_array_get(), trace_types_lock
3077  * ensures that i_cdev was already initialized.
3078  */
tracing_get_cpu(struct inode * inode)3079 static inline int tracing_get_cpu(struct inode *inode)
3080 {
3081 	if (inode->i_cdev) /* See trace_create_cpu_file() */
3082 		return (long)inode->i_cdev - 1;
3083 	return RING_BUFFER_ALL_CPUS;
3084 }
3085 
3086 static const struct seq_operations tracer_seq_ops = {
3087 	.start		= s_start,
3088 	.next		= s_next,
3089 	.stop		= s_stop,
3090 	.show		= s_show,
3091 };
3092 
3093 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3094 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3095 {
3096 	struct trace_array *tr = inode->i_private;
3097 	struct trace_iterator *iter;
3098 	int cpu;
3099 
3100 	if (tracing_disabled)
3101 		return ERR_PTR(-ENODEV);
3102 
3103 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3104 	if (!iter)
3105 		return ERR_PTR(-ENOMEM);
3106 
3107 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3108 				    GFP_KERNEL);
3109 	if (!iter->buffer_iter)
3110 		goto release;
3111 
3112 	/*
3113 	 * We make a copy of the current tracer to avoid concurrent
3114 	 * changes on it while we are reading.
3115 	 */
3116 	mutex_lock(&trace_types_lock);
3117 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3118 	if (!iter->trace)
3119 		goto fail;
3120 
3121 	*iter->trace = *tr->current_trace;
3122 
3123 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3124 		goto fail;
3125 
3126 	iter->tr = tr;
3127 
3128 #ifdef CONFIG_TRACER_MAX_TRACE
3129 	/* Currently only the top directory has a snapshot */
3130 	if (tr->current_trace->print_max || snapshot)
3131 		iter->trace_buffer = &tr->max_buffer;
3132 	else
3133 #endif
3134 		iter->trace_buffer = &tr->trace_buffer;
3135 	iter->snapshot = snapshot;
3136 	iter->pos = -1;
3137 	iter->cpu_file = tracing_get_cpu(inode);
3138 	mutex_init(&iter->mutex);
3139 
3140 	/* Notify the tracer early; before we stop tracing. */
3141 	if (iter->trace && iter->trace->open)
3142 		iter->trace->open(iter);
3143 
3144 	/* Annotate start of buffers if we had overruns */
3145 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3146 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3147 
3148 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3149 	if (trace_clocks[tr->clock_id].in_ns)
3150 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3151 
3152 	/* stop the trace while dumping if we are not opening "snapshot" */
3153 	if (!iter->snapshot)
3154 		tracing_stop_tr(tr);
3155 
3156 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3157 		for_each_tracing_cpu(cpu) {
3158 			iter->buffer_iter[cpu] =
3159 				ring_buffer_read_prepare(iter->trace_buffer->buffer,
3160 							 cpu, GFP_KERNEL);
3161 		}
3162 		ring_buffer_read_prepare_sync();
3163 		for_each_tracing_cpu(cpu) {
3164 			ring_buffer_read_start(iter->buffer_iter[cpu]);
3165 			tracing_iter_reset(iter, cpu);
3166 		}
3167 	} else {
3168 		cpu = iter->cpu_file;
3169 		iter->buffer_iter[cpu] =
3170 			ring_buffer_read_prepare(iter->trace_buffer->buffer,
3171 						 cpu, GFP_KERNEL);
3172 		ring_buffer_read_prepare_sync();
3173 		ring_buffer_read_start(iter->buffer_iter[cpu]);
3174 		tracing_iter_reset(iter, cpu);
3175 	}
3176 
3177 	mutex_unlock(&trace_types_lock);
3178 
3179 	return iter;
3180 
3181  fail:
3182 	mutex_unlock(&trace_types_lock);
3183 	kfree(iter->trace);
3184 	kfree(iter->buffer_iter);
3185 release:
3186 	seq_release_private(inode, file);
3187 	return ERR_PTR(-ENOMEM);
3188 }
3189 
tracing_open_generic(struct inode * inode,struct file * filp)3190 int tracing_open_generic(struct inode *inode, struct file *filp)
3191 {
3192 	if (tracing_disabled)
3193 		return -ENODEV;
3194 
3195 	filp->private_data = inode->i_private;
3196 	return 0;
3197 }
3198 
tracing_is_disabled(void)3199 bool tracing_is_disabled(void)
3200 {
3201 	return (tracing_disabled) ? true: false;
3202 }
3203 
3204 /*
3205  * Open and update trace_array ref count.
3206  * Must have the current trace_array passed to it.
3207  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)3208 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3209 {
3210 	struct trace_array *tr = inode->i_private;
3211 
3212 	if (tracing_disabled)
3213 		return -ENODEV;
3214 
3215 	if (trace_array_get(tr) < 0)
3216 		return -ENODEV;
3217 
3218 	filp->private_data = inode->i_private;
3219 
3220 	return 0;
3221 }
3222 
tracing_release(struct inode * inode,struct file * file)3223 static int tracing_release(struct inode *inode, struct file *file)
3224 {
3225 	struct trace_array *tr = inode->i_private;
3226 	struct seq_file *m = file->private_data;
3227 	struct trace_iterator *iter;
3228 	int cpu;
3229 
3230 	if (!(file->f_mode & FMODE_READ)) {
3231 		trace_array_put(tr);
3232 		return 0;
3233 	}
3234 
3235 	/* Writes do not use seq_file */
3236 	iter = m->private;
3237 	mutex_lock(&trace_types_lock);
3238 
3239 	for_each_tracing_cpu(cpu) {
3240 		if (iter->buffer_iter[cpu])
3241 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3242 	}
3243 
3244 	if (iter->trace && iter->trace->close)
3245 		iter->trace->close(iter);
3246 
3247 	if (!iter->snapshot)
3248 		/* reenable tracing if it was previously enabled */
3249 		tracing_start_tr(tr);
3250 
3251 	__trace_array_put(tr);
3252 
3253 	mutex_unlock(&trace_types_lock);
3254 
3255 	mutex_destroy(&iter->mutex);
3256 	free_cpumask_var(iter->started);
3257 	kfree(iter->trace);
3258 	kfree(iter->buffer_iter);
3259 	seq_release_private(inode, file);
3260 
3261 	return 0;
3262 }
3263 
tracing_release_generic_tr(struct inode * inode,struct file * file)3264 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3265 {
3266 	struct trace_array *tr = inode->i_private;
3267 
3268 	trace_array_put(tr);
3269 	return 0;
3270 }
3271 
tracing_single_release_tr(struct inode * inode,struct file * file)3272 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3273 {
3274 	struct trace_array *tr = inode->i_private;
3275 
3276 	trace_array_put(tr);
3277 
3278 	return single_release(inode, file);
3279 }
3280 
tracing_open(struct inode * inode,struct file * file)3281 static int tracing_open(struct inode *inode, struct file *file)
3282 {
3283 	struct trace_array *tr = inode->i_private;
3284 	struct trace_iterator *iter;
3285 	int ret = 0;
3286 
3287 	if (trace_array_get(tr) < 0)
3288 		return -ENODEV;
3289 
3290 	/* If this file was open for write, then erase contents */
3291 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3292 		int cpu = tracing_get_cpu(inode);
3293 		struct trace_buffer *trace_buf = &tr->trace_buffer;
3294 
3295 #ifdef CONFIG_TRACER_MAX_TRACE
3296 		if (tr->current_trace->print_max)
3297 			trace_buf = &tr->max_buffer;
3298 #endif
3299 
3300 		if (cpu == RING_BUFFER_ALL_CPUS)
3301 			tracing_reset_online_cpus(trace_buf);
3302 		else
3303 			tracing_reset(trace_buf, cpu);
3304 	}
3305 
3306 	if (file->f_mode & FMODE_READ) {
3307 		iter = __tracing_open(inode, file, false);
3308 		if (IS_ERR(iter))
3309 			ret = PTR_ERR(iter);
3310 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3311 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3312 	}
3313 
3314 	if (ret < 0)
3315 		trace_array_put(tr);
3316 
3317 	return ret;
3318 }
3319 
3320 /*
3321  * Some tracers are not suitable for instance buffers.
3322  * A tracer is always available for the global array (toplevel)
3323  * or if it explicitly states that it is.
3324  */
3325 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)3326 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3327 {
3328 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3329 }
3330 
3331 /* Find the next tracer that this trace array may use */
3332 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)3333 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3334 {
3335 	while (t && !trace_ok_for_array(t, tr))
3336 		t = t->next;
3337 
3338 	return t;
3339 }
3340 
3341 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)3342 t_next(struct seq_file *m, void *v, loff_t *pos)
3343 {
3344 	struct trace_array *tr = m->private;
3345 	struct tracer *t = v;
3346 
3347 	(*pos)++;
3348 
3349 	if (t)
3350 		t = get_tracer_for_array(tr, t->next);
3351 
3352 	return t;
3353 }
3354 
t_start(struct seq_file * m,loff_t * pos)3355 static void *t_start(struct seq_file *m, loff_t *pos)
3356 {
3357 	struct trace_array *tr = m->private;
3358 	struct tracer *t;
3359 	loff_t l = 0;
3360 
3361 	mutex_lock(&trace_types_lock);
3362 
3363 	t = get_tracer_for_array(tr, trace_types);
3364 	for (; t && l < *pos; t = t_next(m, t, &l))
3365 			;
3366 
3367 	return t;
3368 }
3369 
t_stop(struct seq_file * m,void * p)3370 static void t_stop(struct seq_file *m, void *p)
3371 {
3372 	mutex_unlock(&trace_types_lock);
3373 }
3374 
t_show(struct seq_file * m,void * v)3375 static int t_show(struct seq_file *m, void *v)
3376 {
3377 	struct tracer *t = v;
3378 
3379 	if (!t)
3380 		return 0;
3381 
3382 	seq_puts(m, t->name);
3383 	if (t->next)
3384 		seq_putc(m, ' ');
3385 	else
3386 		seq_putc(m, '\n');
3387 
3388 	return 0;
3389 }
3390 
3391 static const struct seq_operations show_traces_seq_ops = {
3392 	.start		= t_start,
3393 	.next		= t_next,
3394 	.stop		= t_stop,
3395 	.show		= t_show,
3396 };
3397 
show_traces_open(struct inode * inode,struct file * file)3398 static int show_traces_open(struct inode *inode, struct file *file)
3399 {
3400 	struct trace_array *tr = inode->i_private;
3401 	struct seq_file *m;
3402 	int ret;
3403 
3404 	if (tracing_disabled)
3405 		return -ENODEV;
3406 
3407 	if (trace_array_get(tr) < 0)
3408 		return -ENODEV;
3409 
3410 	ret = seq_open(file, &show_traces_seq_ops);
3411 	if (ret) {
3412 		trace_array_put(tr);
3413 		return ret;
3414 	}
3415 
3416 	m = file->private_data;
3417 	m->private = tr;
3418 
3419 	return 0;
3420 }
3421 
show_traces_release(struct inode * inode,struct file * file)3422 static int show_traces_release(struct inode *inode, struct file *file)
3423 {
3424 	struct trace_array *tr = inode->i_private;
3425 
3426 	trace_array_put(tr);
3427 	return seq_release(inode, file);
3428 }
3429 
3430 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3431 tracing_write_stub(struct file *filp, const char __user *ubuf,
3432 		   size_t count, loff_t *ppos)
3433 {
3434 	return count;
3435 }
3436 
tracing_lseek(struct file * file,loff_t offset,int whence)3437 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3438 {
3439 	int ret;
3440 
3441 	if (file->f_mode & FMODE_READ)
3442 		ret = seq_lseek(file, offset, whence);
3443 	else
3444 		file->f_pos = ret = 0;
3445 
3446 	return ret;
3447 }
3448 
3449 static const struct file_operations tracing_fops = {
3450 	.open		= tracing_open,
3451 	.read		= seq_read,
3452 	.write		= tracing_write_stub,
3453 	.llseek		= tracing_lseek,
3454 	.release	= tracing_release,
3455 };
3456 
3457 static const struct file_operations show_traces_fops = {
3458 	.open		= show_traces_open,
3459 	.read		= seq_read,
3460 	.llseek		= seq_lseek,
3461 	.release	= show_traces_release,
3462 };
3463 
3464 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)3465 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3466 		     size_t count, loff_t *ppos)
3467 {
3468 	struct trace_array *tr = file_inode(filp)->i_private;
3469 	char *mask_str;
3470 	int len;
3471 
3472 	len = snprintf(NULL, 0, "%*pb\n",
3473 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
3474 	mask_str = kmalloc(len, GFP_KERNEL);
3475 	if (!mask_str)
3476 		return -ENOMEM;
3477 
3478 	len = snprintf(mask_str, len, "%*pb\n",
3479 		       cpumask_pr_args(tr->tracing_cpumask));
3480 	if (len >= count) {
3481 		count = -EINVAL;
3482 		goto out_err;
3483 	}
3484 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3485 
3486 out_err:
3487 	kfree(mask_str);
3488 
3489 	return count;
3490 }
3491 
3492 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3493 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3494 		      size_t count, loff_t *ppos)
3495 {
3496 	struct trace_array *tr = file_inode(filp)->i_private;
3497 	cpumask_var_t tracing_cpumask_new;
3498 	int err, cpu;
3499 
3500 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3501 		return -ENOMEM;
3502 
3503 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3504 	if (err)
3505 		goto err_unlock;
3506 
3507 	local_irq_disable();
3508 	arch_spin_lock(&tr->max_lock);
3509 	for_each_tracing_cpu(cpu) {
3510 		/*
3511 		 * Increase/decrease the disabled counter if we are
3512 		 * about to flip a bit in the cpumask:
3513 		 */
3514 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3515 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3516 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3517 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3518 		}
3519 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3520 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3521 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3522 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3523 		}
3524 	}
3525 	arch_spin_unlock(&tr->max_lock);
3526 	local_irq_enable();
3527 
3528 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3529 	free_cpumask_var(tracing_cpumask_new);
3530 
3531 	return count;
3532 
3533 err_unlock:
3534 	free_cpumask_var(tracing_cpumask_new);
3535 
3536 	return err;
3537 }
3538 
3539 static const struct file_operations tracing_cpumask_fops = {
3540 	.open		= tracing_open_generic_tr,
3541 	.read		= tracing_cpumask_read,
3542 	.write		= tracing_cpumask_write,
3543 	.release	= tracing_release_generic_tr,
3544 	.llseek		= generic_file_llseek,
3545 };
3546 
tracing_trace_options_show(struct seq_file * m,void * v)3547 static int tracing_trace_options_show(struct seq_file *m, void *v)
3548 {
3549 	struct tracer_opt *trace_opts;
3550 	struct trace_array *tr = m->private;
3551 	u32 tracer_flags;
3552 	int i;
3553 
3554 	mutex_lock(&trace_types_lock);
3555 	tracer_flags = tr->current_trace->flags->val;
3556 	trace_opts = tr->current_trace->flags->opts;
3557 
3558 	for (i = 0; trace_options[i]; i++) {
3559 		if (tr->trace_flags & (1 << i))
3560 			seq_printf(m, "%s\n", trace_options[i]);
3561 		else
3562 			seq_printf(m, "no%s\n", trace_options[i]);
3563 	}
3564 
3565 	for (i = 0; trace_opts[i].name; i++) {
3566 		if (tracer_flags & trace_opts[i].bit)
3567 			seq_printf(m, "%s\n", trace_opts[i].name);
3568 		else
3569 			seq_printf(m, "no%s\n", trace_opts[i].name);
3570 	}
3571 	mutex_unlock(&trace_types_lock);
3572 
3573 	return 0;
3574 }
3575 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)3576 static int __set_tracer_option(struct trace_array *tr,
3577 			       struct tracer_flags *tracer_flags,
3578 			       struct tracer_opt *opts, int neg)
3579 {
3580 	struct tracer *trace = tr->current_trace;
3581 	int ret;
3582 
3583 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3584 	if (ret)
3585 		return ret;
3586 
3587 	if (neg)
3588 		tracer_flags->val &= ~opts->bit;
3589 	else
3590 		tracer_flags->val |= opts->bit;
3591 	return 0;
3592 }
3593 
3594 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)3595 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3596 {
3597 	struct tracer *trace = tr->current_trace;
3598 	struct tracer_flags *tracer_flags = trace->flags;
3599 	struct tracer_opt *opts = NULL;
3600 	int i;
3601 
3602 	for (i = 0; tracer_flags->opts[i].name; i++) {
3603 		opts = &tracer_flags->opts[i];
3604 
3605 		if (strcmp(cmp, opts->name) == 0)
3606 			return __set_tracer_option(tr, trace->flags, opts, neg);
3607 	}
3608 
3609 	return -EINVAL;
3610 }
3611 
3612 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)3613 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3614 {
3615 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3616 		return -1;
3617 
3618 	return 0;
3619 }
3620 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)3621 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3622 {
3623 	/* do nothing if flag is already set */
3624 	if (!!(tr->trace_flags & mask) == !!enabled)
3625 		return 0;
3626 
3627 	/* Give the tracer a chance to approve the change */
3628 	if (tr->current_trace->flag_changed)
3629 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3630 			return -EINVAL;
3631 
3632 	if (enabled)
3633 		tr->trace_flags |= mask;
3634 	else
3635 		tr->trace_flags &= ~mask;
3636 
3637 	if (mask == TRACE_ITER_RECORD_CMD)
3638 		trace_event_enable_cmd_record(enabled);
3639 
3640 	if (mask == TRACE_ITER_OVERWRITE) {
3641 		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3642 #ifdef CONFIG_TRACER_MAX_TRACE
3643 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3644 #endif
3645 	}
3646 
3647 	if (mask == TRACE_ITER_PRINTK) {
3648 		trace_printk_start_stop_comm(enabled);
3649 		trace_printk_control(enabled);
3650 	}
3651 
3652 	return 0;
3653 }
3654 
trace_set_options(struct trace_array * tr,char * option)3655 static int trace_set_options(struct trace_array *tr, char *option)
3656 {
3657 	char *cmp;
3658 	int neg = 0;
3659 	int ret = -ENODEV;
3660 	int i;
3661 	size_t orig_len = strlen(option);
3662 
3663 	cmp = strstrip(option);
3664 
3665 	if (strncmp(cmp, "no", 2) == 0) {
3666 		neg = 1;
3667 		cmp += 2;
3668 	}
3669 
3670 	mutex_lock(&trace_types_lock);
3671 
3672 	for (i = 0; trace_options[i]; i++) {
3673 		if (strcmp(cmp, trace_options[i]) == 0) {
3674 			ret = set_tracer_flag(tr, 1 << i, !neg);
3675 			break;
3676 		}
3677 	}
3678 
3679 	/* If no option could be set, test the specific tracer options */
3680 	if (!trace_options[i])
3681 		ret = set_tracer_option(tr, cmp, neg);
3682 
3683 	mutex_unlock(&trace_types_lock);
3684 
3685 	/*
3686 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3687 	 * turn it back into a space.
3688 	 */
3689 	if (orig_len > strlen(option))
3690 		option[strlen(option)] = ' ';
3691 
3692 	return ret;
3693 }
3694 
apply_trace_boot_options(void)3695 static void __init apply_trace_boot_options(void)
3696 {
3697 	char *buf = trace_boot_options_buf;
3698 	char *option;
3699 
3700 	while (true) {
3701 		option = strsep(&buf, ",");
3702 
3703 		if (!option)
3704 			break;
3705 
3706 		if (*option)
3707 			trace_set_options(&global_trace, option);
3708 
3709 		/* Put back the comma to allow this to be called again */
3710 		if (buf)
3711 			*(buf - 1) = ',';
3712 	}
3713 }
3714 
3715 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)3716 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3717 			size_t cnt, loff_t *ppos)
3718 {
3719 	struct seq_file *m = filp->private_data;
3720 	struct trace_array *tr = m->private;
3721 	char buf[64];
3722 	int ret;
3723 
3724 	if (cnt >= sizeof(buf))
3725 		return -EINVAL;
3726 
3727 	if (copy_from_user(&buf, ubuf, cnt))
3728 		return -EFAULT;
3729 
3730 	buf[cnt] = 0;
3731 
3732 	ret = trace_set_options(tr, buf);
3733 	if (ret < 0)
3734 		return ret;
3735 
3736 	*ppos += cnt;
3737 
3738 	return cnt;
3739 }
3740 
tracing_trace_options_open(struct inode * inode,struct file * file)3741 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3742 {
3743 	struct trace_array *tr = inode->i_private;
3744 	int ret;
3745 
3746 	if (tracing_disabled)
3747 		return -ENODEV;
3748 
3749 	if (trace_array_get(tr) < 0)
3750 		return -ENODEV;
3751 
3752 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
3753 	if (ret < 0)
3754 		trace_array_put(tr);
3755 
3756 	return ret;
3757 }
3758 
3759 static const struct file_operations tracing_iter_fops = {
3760 	.open		= tracing_trace_options_open,
3761 	.read		= seq_read,
3762 	.llseek		= seq_lseek,
3763 	.release	= tracing_single_release_tr,
3764 	.write		= tracing_trace_options_write,
3765 };
3766 
3767 static const char readme_msg[] =
3768 	"tracing mini-HOWTO:\n\n"
3769 	"# echo 0 > tracing_on : quick way to disable tracing\n"
3770 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3771 	" Important files:\n"
3772 	"  trace\t\t\t- The static contents of the buffer\n"
3773 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
3774 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3775 	"  current_tracer\t- function and latency tracers\n"
3776 	"  available_tracers\t- list of configured tracers for current_tracer\n"
3777 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3778 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3779 	"  trace_clock\t\t-change the clock used to order events\n"
3780 	"       local:   Per cpu clock but may not be synced across CPUs\n"
3781 	"      global:   Synced across CPUs but slows tracing down.\n"
3782 	"     counter:   Not a clock, but just an increment\n"
3783 	"      uptime:   Jiffy counter from time of boot\n"
3784 	"        perf:   Same clock that perf events use\n"
3785 #ifdef CONFIG_X86_64
3786 	"     x86-tsc:   TSC cycle counter\n"
3787 #endif
3788 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3789 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
3790 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3791 	"\t\t\t  Remove sub-buffer with rmdir\n"
3792 	"  trace_options\t\t- Set format or modify how tracing happens\n"
3793 	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3794 	"\t\t\t  option name\n"
3795 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3796 #ifdef CONFIG_DYNAMIC_FTRACE
3797 	"\n  available_filter_functions - list of functions that can be filtered on\n"
3798 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
3799 	"\t\t\t  functions\n"
3800 	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3801 	"\t     modules: Can select a group via module\n"
3802 	"\t      Format: :mod:<module-name>\n"
3803 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3804 	"\t    triggers: a command to perform when function is hit\n"
3805 	"\t      Format: <function>:<trigger>[:count]\n"
3806 	"\t     trigger: traceon, traceoff\n"
3807 	"\t\t      enable_event:<system>:<event>\n"
3808 	"\t\t      disable_event:<system>:<event>\n"
3809 #ifdef CONFIG_STACKTRACE
3810 	"\t\t      stacktrace\n"
3811 #endif
3812 #ifdef CONFIG_TRACER_SNAPSHOT
3813 	"\t\t      snapshot\n"
3814 #endif
3815 	"\t\t      dump\n"
3816 	"\t\t      cpudump\n"
3817 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3818 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3819 	"\t     The first one will disable tracing every time do_fault is hit\n"
3820 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3821 	"\t       The first time do trap is hit and it disables tracing, the\n"
3822 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
3823 	"\t       the counter will not decrement. It only decrements when the\n"
3824 	"\t       trigger did work\n"
3825 	"\t     To remove trigger without count:\n"
3826 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3827 	"\t     To remove trigger with a count:\n"
3828 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3829 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3830 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3831 	"\t    modules: Can select a group via module command :mod:\n"
3832 	"\t    Does not accept triggers\n"
3833 #endif /* CONFIG_DYNAMIC_FTRACE */
3834 #ifdef CONFIG_FUNCTION_TRACER
3835 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3836 	"\t\t    (function)\n"
3837 #endif
3838 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3839 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3840 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3841 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3842 #endif
3843 #ifdef CONFIG_TRACER_SNAPSHOT
3844 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3845 	"\t\t\t  snapshot buffer. Read the contents for more\n"
3846 	"\t\t\t  information\n"
3847 #endif
3848 #ifdef CONFIG_STACK_TRACER
3849 	"  stack_trace\t\t- Shows the max stack trace when active\n"
3850 	"  stack_max_size\t- Shows current max stack size that was traced\n"
3851 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
3852 	"\t\t\t  new trace)\n"
3853 #ifdef CONFIG_DYNAMIC_FTRACE
3854 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3855 	"\t\t\t  traces\n"
3856 #endif
3857 #endif /* CONFIG_STACK_TRACER */
3858 	"  events/\t\t- Directory containing all trace event subsystems:\n"
3859 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3860 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
3861 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3862 	"\t\t\t  events\n"
3863 	"      filter\t\t- If set, only events passing filter are traced\n"
3864 	"  events/<system>/<event>/\t- Directory containing control files for\n"
3865 	"\t\t\t  <event>:\n"
3866 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3867 	"      filter\t\t- If set, only events passing filter are traced\n"
3868 	"      trigger\t\t- If set, a command to perform when event is hit\n"
3869 	"\t    Format: <trigger>[:count][if <filter>]\n"
3870 	"\t   trigger: traceon, traceoff\n"
3871 	"\t            enable_event:<system>:<event>\n"
3872 	"\t            disable_event:<system>:<event>\n"
3873 #ifdef CONFIG_STACKTRACE
3874 	"\t\t    stacktrace\n"
3875 #endif
3876 #ifdef CONFIG_TRACER_SNAPSHOT
3877 	"\t\t    snapshot\n"
3878 #endif
3879 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3880 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3881 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3882 	"\t                  events/block/block_unplug/trigger\n"
3883 	"\t   The first disables tracing every time block_unplug is hit.\n"
3884 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3885 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3886 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3887 	"\t   Like function triggers, the counter is only decremented if it\n"
3888 	"\t    enabled or disabled tracing.\n"
3889 	"\t   To remove a trigger without a count:\n"
3890 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
3891 	"\t   To remove a trigger with a count:\n"
3892 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3893 	"\t   Filters can be ignored when removing a trigger.\n"
3894 ;
3895 
3896 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)3897 tracing_readme_read(struct file *filp, char __user *ubuf,
3898 		       size_t cnt, loff_t *ppos)
3899 {
3900 	return simple_read_from_buffer(ubuf, cnt, ppos,
3901 					readme_msg, strlen(readme_msg));
3902 }
3903 
3904 static const struct file_operations tracing_readme_fops = {
3905 	.open		= tracing_open_generic,
3906 	.read		= tracing_readme_read,
3907 	.llseek		= generic_file_llseek,
3908 };
3909 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)3910 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3911 {
3912 	unsigned int *ptr = v;
3913 
3914 	if (*pos || m->count)
3915 		ptr++;
3916 
3917 	(*pos)++;
3918 
3919 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3920 	     ptr++) {
3921 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3922 			continue;
3923 
3924 		return ptr;
3925 	}
3926 
3927 	return NULL;
3928 }
3929 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)3930 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3931 {
3932 	void *v;
3933 	loff_t l = 0;
3934 
3935 	preempt_disable();
3936 	arch_spin_lock(&trace_cmdline_lock);
3937 
3938 	v = &savedcmd->map_cmdline_to_pid[0];
3939 	while (l <= *pos) {
3940 		v = saved_cmdlines_next(m, v, &l);
3941 		if (!v)
3942 			return NULL;
3943 	}
3944 
3945 	return v;
3946 }
3947 
saved_cmdlines_stop(struct seq_file * m,void * v)3948 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3949 {
3950 	arch_spin_unlock(&trace_cmdline_lock);
3951 	preempt_enable();
3952 }
3953 
saved_cmdlines_show(struct seq_file * m,void * v)3954 static int saved_cmdlines_show(struct seq_file *m, void *v)
3955 {
3956 	char buf[TASK_COMM_LEN];
3957 	unsigned int *pid = v;
3958 
3959 	__trace_find_cmdline(*pid, buf);
3960 	seq_printf(m, "%d %s\n", *pid, buf);
3961 	return 0;
3962 }
3963 
3964 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3965 	.start		= saved_cmdlines_start,
3966 	.next		= saved_cmdlines_next,
3967 	.stop		= saved_cmdlines_stop,
3968 	.show		= saved_cmdlines_show,
3969 };
3970 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)3971 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3972 {
3973 	if (tracing_disabled)
3974 		return -ENODEV;
3975 
3976 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3977 }
3978 
3979 static const struct file_operations tracing_saved_cmdlines_fops = {
3980 	.open		= tracing_saved_cmdlines_open,
3981 	.read		= seq_read,
3982 	.llseek		= seq_lseek,
3983 	.release	= seq_release,
3984 };
3985 
3986 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)3987 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3988 				 size_t cnt, loff_t *ppos)
3989 {
3990 	char buf[64];
3991 	int r;
3992 
3993 	arch_spin_lock(&trace_cmdline_lock);
3994 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3995 	arch_spin_unlock(&trace_cmdline_lock);
3996 
3997 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3998 }
3999 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)4000 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4001 {
4002 	kfree(s->saved_cmdlines);
4003 	kfree(s->map_cmdline_to_pid);
4004 	kfree(s);
4005 }
4006 
tracing_resize_saved_cmdlines(unsigned int val)4007 static int tracing_resize_saved_cmdlines(unsigned int val)
4008 {
4009 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
4010 
4011 	s = kmalloc(sizeof(*s), GFP_KERNEL);
4012 	if (!s)
4013 		return -ENOMEM;
4014 
4015 	if (allocate_cmdlines_buffer(val, s) < 0) {
4016 		kfree(s);
4017 		return -ENOMEM;
4018 	}
4019 
4020 	arch_spin_lock(&trace_cmdline_lock);
4021 	savedcmd_temp = savedcmd;
4022 	savedcmd = s;
4023 	arch_spin_unlock(&trace_cmdline_lock);
4024 	free_saved_cmdlines_buffer(savedcmd_temp);
4025 
4026 	return 0;
4027 }
4028 
4029 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4030 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4031 				  size_t cnt, loff_t *ppos)
4032 {
4033 	unsigned long val;
4034 	int ret;
4035 
4036 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4037 	if (ret)
4038 		return ret;
4039 
4040 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
4041 	if (!val || val > PID_MAX_DEFAULT)
4042 		return -EINVAL;
4043 
4044 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
4045 	if (ret < 0)
4046 		return ret;
4047 
4048 	*ppos += cnt;
4049 
4050 	return cnt;
4051 }
4052 
4053 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4054 	.open		= tracing_open_generic,
4055 	.read		= tracing_saved_cmdlines_size_read,
4056 	.write		= tracing_saved_cmdlines_size_write,
4057 };
4058 
4059 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4060 static union trace_enum_map_item *
update_enum_map(union trace_enum_map_item * ptr)4061 update_enum_map(union trace_enum_map_item *ptr)
4062 {
4063 	if (!ptr->map.enum_string) {
4064 		if (ptr->tail.next) {
4065 			ptr = ptr->tail.next;
4066 			/* Set ptr to the next real item (skip head) */
4067 			ptr++;
4068 		} else
4069 			return NULL;
4070 	}
4071 	return ptr;
4072 }
4073 
enum_map_next(struct seq_file * m,void * v,loff_t * pos)4074 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4075 {
4076 	union trace_enum_map_item *ptr = v;
4077 
4078 	/*
4079 	 * Paranoid! If ptr points to end, we don't want to increment past it.
4080 	 * This really should never happen.
4081 	 */
4082 	ptr = update_enum_map(ptr);
4083 	if (WARN_ON_ONCE(!ptr))
4084 		return NULL;
4085 
4086 	ptr++;
4087 
4088 	(*pos)++;
4089 
4090 	ptr = update_enum_map(ptr);
4091 
4092 	return ptr;
4093 }
4094 
enum_map_start(struct seq_file * m,loff_t * pos)4095 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4096 {
4097 	union trace_enum_map_item *v;
4098 	loff_t l = 0;
4099 
4100 	mutex_lock(&trace_enum_mutex);
4101 
4102 	v = trace_enum_maps;
4103 	if (v)
4104 		v++;
4105 
4106 	while (v && l < *pos) {
4107 		v = enum_map_next(m, v, &l);
4108 	}
4109 
4110 	return v;
4111 }
4112 
enum_map_stop(struct seq_file * m,void * v)4113 static void enum_map_stop(struct seq_file *m, void *v)
4114 {
4115 	mutex_unlock(&trace_enum_mutex);
4116 }
4117 
enum_map_show(struct seq_file * m,void * v)4118 static int enum_map_show(struct seq_file *m, void *v)
4119 {
4120 	union trace_enum_map_item *ptr = v;
4121 
4122 	seq_printf(m, "%s %ld (%s)\n",
4123 		   ptr->map.enum_string, ptr->map.enum_value,
4124 		   ptr->map.system);
4125 
4126 	return 0;
4127 }
4128 
4129 static const struct seq_operations tracing_enum_map_seq_ops = {
4130 	.start		= enum_map_start,
4131 	.next		= enum_map_next,
4132 	.stop		= enum_map_stop,
4133 	.show		= enum_map_show,
4134 };
4135 
tracing_enum_map_open(struct inode * inode,struct file * filp)4136 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4137 {
4138 	if (tracing_disabled)
4139 		return -ENODEV;
4140 
4141 	return seq_open(filp, &tracing_enum_map_seq_ops);
4142 }
4143 
4144 static const struct file_operations tracing_enum_map_fops = {
4145 	.open		= tracing_enum_map_open,
4146 	.read		= seq_read,
4147 	.llseek		= seq_lseek,
4148 	.release	= seq_release,
4149 };
4150 
4151 static inline union trace_enum_map_item *
trace_enum_jmp_to_tail(union trace_enum_map_item * ptr)4152 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4153 {
4154 	/* Return tail of array given the head */
4155 	return ptr + ptr->head.length + 1;
4156 }
4157 
4158 static void
trace_insert_enum_map_file(struct module * mod,struct trace_enum_map ** start,int len)4159 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4160 			   int len)
4161 {
4162 	struct trace_enum_map **stop;
4163 	struct trace_enum_map **map;
4164 	union trace_enum_map_item *map_array;
4165 	union trace_enum_map_item *ptr;
4166 
4167 	stop = start + len;
4168 
4169 	/*
4170 	 * The trace_enum_maps contains the map plus a head and tail item,
4171 	 * where the head holds the module and length of array, and the
4172 	 * tail holds a pointer to the next list.
4173 	 */
4174 	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4175 	if (!map_array) {
4176 		pr_warning("Unable to allocate trace enum mapping\n");
4177 		return;
4178 	}
4179 
4180 	mutex_lock(&trace_enum_mutex);
4181 
4182 	if (!trace_enum_maps)
4183 		trace_enum_maps = map_array;
4184 	else {
4185 		ptr = trace_enum_maps;
4186 		for (;;) {
4187 			ptr = trace_enum_jmp_to_tail(ptr);
4188 			if (!ptr->tail.next)
4189 				break;
4190 			ptr = ptr->tail.next;
4191 
4192 		}
4193 		ptr->tail.next = map_array;
4194 	}
4195 	map_array->head.mod = mod;
4196 	map_array->head.length = len;
4197 	map_array++;
4198 
4199 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4200 		map_array->map = **map;
4201 		map_array++;
4202 	}
4203 	memset(map_array, 0, sizeof(*map_array));
4204 
4205 	mutex_unlock(&trace_enum_mutex);
4206 }
4207 
trace_create_enum_file(struct dentry * d_tracer)4208 static void trace_create_enum_file(struct dentry *d_tracer)
4209 {
4210 	trace_create_file("enum_map", 0444, d_tracer,
4211 			  NULL, &tracing_enum_map_fops);
4212 }
4213 
4214 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
trace_create_enum_file(struct dentry * d_tracer)4215 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
trace_insert_enum_map_file(struct module * mod,struct trace_enum_map ** start,int len)4216 static inline void trace_insert_enum_map_file(struct module *mod,
4217 			      struct trace_enum_map **start, int len) { }
4218 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4219 
trace_insert_enum_map(struct module * mod,struct trace_enum_map ** start,int len)4220 static void trace_insert_enum_map(struct module *mod,
4221 				  struct trace_enum_map **start, int len)
4222 {
4223 	struct trace_enum_map **map;
4224 
4225 	if (len <= 0)
4226 		return;
4227 
4228 	map = start;
4229 
4230 	trace_event_enum_update(map, len);
4231 
4232 	trace_insert_enum_map_file(mod, start, len);
4233 }
4234 
4235 static ssize_t
tracing_saved_tgids_read(struct file * file,char __user * ubuf,size_t cnt,loff_t * ppos)4236 tracing_saved_tgids_read(struct file *file, char __user *ubuf,
4237 				size_t cnt, loff_t *ppos)
4238 {
4239 	char *file_buf;
4240 	char *buf;
4241 	int len = 0;
4242 	int pid;
4243 	int i;
4244 
4245 	file_buf = kmalloc(SAVED_CMDLINES_DEFAULT*(16+1+16), GFP_KERNEL);
4246 	if (!file_buf)
4247 		return -ENOMEM;
4248 
4249 	buf = file_buf;
4250 
4251 	for (i = 0; i < SAVED_CMDLINES_DEFAULT; i++) {
4252 		int tgid;
4253 		int r;
4254 
4255 		pid = savedcmd->map_cmdline_to_pid[i];
4256 		if (pid == -1 || pid == NO_CMDLINE_MAP)
4257 			continue;
4258 
4259 		tgid = trace_find_tgid(pid);
4260 		r = sprintf(buf, "%d %d\n", pid, tgid);
4261 		buf += r;
4262 		len += r;
4263 	}
4264 
4265 	len = simple_read_from_buffer(ubuf, cnt, ppos,
4266 				      file_buf, len);
4267 
4268 	kfree(file_buf);
4269 
4270 	return len;
4271 }
4272 
4273 static const struct file_operations tracing_saved_tgids_fops = {
4274 	.open	= tracing_open_generic,
4275 	.read	= tracing_saved_tgids_read,
4276 	.llseek	= generic_file_llseek,
4277 };
4278 
4279 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4280 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4281 		       size_t cnt, loff_t *ppos)
4282 {
4283 	struct trace_array *tr = filp->private_data;
4284 	char buf[MAX_TRACER_SIZE+2];
4285 	int r;
4286 
4287 	mutex_lock(&trace_types_lock);
4288 	r = sprintf(buf, "%s\n", tr->current_trace->name);
4289 	mutex_unlock(&trace_types_lock);
4290 
4291 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4292 }
4293 
tracer_init(struct tracer * t,struct trace_array * tr)4294 int tracer_init(struct tracer *t, struct trace_array *tr)
4295 {
4296 	tracing_reset_online_cpus(&tr->trace_buffer);
4297 	return t->init(tr);
4298 }
4299 
set_buffer_entries(struct trace_buffer * buf,unsigned long val)4300 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4301 {
4302 	int cpu;
4303 
4304 	for_each_tracing_cpu(cpu)
4305 		per_cpu_ptr(buf->data, cpu)->entries = val;
4306 }
4307 
4308 #ifdef CONFIG_TRACER_MAX_TRACE
4309 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct trace_buffer * trace_buf,struct trace_buffer * size_buf,int cpu_id)4310 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4311 					struct trace_buffer *size_buf, int cpu_id)
4312 {
4313 	int cpu, ret = 0;
4314 
4315 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4316 		for_each_tracing_cpu(cpu) {
4317 			ret = ring_buffer_resize(trace_buf->buffer,
4318 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4319 			if (ret < 0)
4320 				break;
4321 			per_cpu_ptr(trace_buf->data, cpu)->entries =
4322 				per_cpu_ptr(size_buf->data, cpu)->entries;
4323 		}
4324 	} else {
4325 		ret = ring_buffer_resize(trace_buf->buffer,
4326 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4327 		if (ret == 0)
4328 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4329 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4330 	}
4331 
4332 	return ret;
4333 }
4334 #endif /* CONFIG_TRACER_MAX_TRACE */
4335 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)4336 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4337 					unsigned long size, int cpu)
4338 {
4339 	int ret;
4340 
4341 	/*
4342 	 * If kernel or user changes the size of the ring buffer
4343 	 * we use the size that was given, and we can forget about
4344 	 * expanding it later.
4345 	 */
4346 	ring_buffer_expanded = true;
4347 
4348 	/* May be called before buffers are initialized */
4349 	if (!tr->trace_buffer.buffer)
4350 		return 0;
4351 
4352 	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4353 	if (ret < 0)
4354 		return ret;
4355 
4356 #ifdef CONFIG_TRACER_MAX_TRACE
4357 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4358 	    !tr->current_trace->use_max_tr)
4359 		goto out;
4360 
4361 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4362 	if (ret < 0) {
4363 		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4364 						     &tr->trace_buffer, cpu);
4365 		if (r < 0) {
4366 			/*
4367 			 * AARGH! We are left with different
4368 			 * size max buffer!!!!
4369 			 * The max buffer is our "snapshot" buffer.
4370 			 * When a tracer needs a snapshot (one of the
4371 			 * latency tracers), it swaps the max buffer
4372 			 * with the saved snap shot. We succeeded to
4373 			 * update the size of the main buffer, but failed to
4374 			 * update the size of the max buffer. But when we tried
4375 			 * to reset the main buffer to the original size, we
4376 			 * failed there too. This is very unlikely to
4377 			 * happen, but if it does, warn and kill all
4378 			 * tracing.
4379 			 */
4380 			WARN_ON(1);
4381 			tracing_disabled = 1;
4382 		}
4383 		return ret;
4384 	}
4385 
4386 	if (cpu == RING_BUFFER_ALL_CPUS)
4387 		set_buffer_entries(&tr->max_buffer, size);
4388 	else
4389 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4390 
4391  out:
4392 #endif /* CONFIG_TRACER_MAX_TRACE */
4393 
4394 	if (cpu == RING_BUFFER_ALL_CPUS)
4395 		set_buffer_entries(&tr->trace_buffer, size);
4396 	else
4397 		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4398 
4399 	return ret;
4400 }
4401 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)4402 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4403 					  unsigned long size, int cpu_id)
4404 {
4405 	int ret = size;
4406 
4407 	mutex_lock(&trace_types_lock);
4408 
4409 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4410 		/* make sure, this cpu is enabled in the mask */
4411 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4412 			ret = -EINVAL;
4413 			goto out;
4414 		}
4415 	}
4416 
4417 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4418 	if (ret < 0)
4419 		ret = -ENOMEM;
4420 
4421 out:
4422 	mutex_unlock(&trace_types_lock);
4423 
4424 	return ret;
4425 }
4426 
4427 
4428 /**
4429  * tracing_update_buffers - used by tracing facility to expand ring buffers
4430  *
4431  * To save on memory when the tracing is never used on a system with it
4432  * configured in. The ring buffers are set to a minimum size. But once
4433  * a user starts to use the tracing facility, then they need to grow
4434  * to their default size.
4435  *
4436  * This function is to be called when a tracer is about to be used.
4437  */
tracing_update_buffers(void)4438 int tracing_update_buffers(void)
4439 {
4440 	int ret = 0;
4441 
4442 	mutex_lock(&trace_types_lock);
4443 	if (!ring_buffer_expanded)
4444 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4445 						RING_BUFFER_ALL_CPUS);
4446 	mutex_unlock(&trace_types_lock);
4447 
4448 	return ret;
4449 }
4450 
4451 struct trace_option_dentry;
4452 
4453 static void
4454 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4455 
4456 /*
4457  * Used to clear out the tracer before deletion of an instance.
4458  * Must have trace_types_lock held.
4459  */
tracing_set_nop(struct trace_array * tr)4460 static void tracing_set_nop(struct trace_array *tr)
4461 {
4462 	if (tr->current_trace == &nop_trace)
4463 		return;
4464 
4465 	tr->current_trace->enabled--;
4466 
4467 	if (tr->current_trace->reset)
4468 		tr->current_trace->reset(tr);
4469 
4470 	tr->current_trace = &nop_trace;
4471 }
4472 
add_tracer_options(struct trace_array * tr,struct tracer * t)4473 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4474 {
4475 	/* Only enable if the directory has been created already. */
4476 	if (!tr->dir)
4477 		return;
4478 
4479 	create_trace_option_files(tr, t);
4480 }
4481 
tracing_set_tracer(struct trace_array * tr,const char * buf)4482 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4483 {
4484 	struct tracer *t;
4485 #ifdef CONFIG_TRACER_MAX_TRACE
4486 	bool had_max_tr;
4487 #endif
4488 	int ret = 0;
4489 
4490 	mutex_lock(&trace_types_lock);
4491 
4492 	if (!ring_buffer_expanded) {
4493 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4494 						RING_BUFFER_ALL_CPUS);
4495 		if (ret < 0)
4496 			goto out;
4497 		ret = 0;
4498 	}
4499 
4500 	for (t = trace_types; t; t = t->next) {
4501 		if (strcmp(t->name, buf) == 0)
4502 			break;
4503 	}
4504 	if (!t) {
4505 		ret = -EINVAL;
4506 		goto out;
4507 	}
4508 	if (t == tr->current_trace)
4509 		goto out;
4510 
4511 	/* Some tracers are only allowed for the top level buffer */
4512 	if (!trace_ok_for_array(t, tr)) {
4513 		ret = -EINVAL;
4514 		goto out;
4515 	}
4516 
4517 	/* If trace pipe files are being read, we can't change the tracer */
4518 	if (tr->current_trace->ref) {
4519 		ret = -EBUSY;
4520 		goto out;
4521 	}
4522 
4523 	trace_branch_disable();
4524 
4525 	tr->current_trace->enabled--;
4526 
4527 	if (tr->current_trace->reset)
4528 		tr->current_trace->reset(tr);
4529 
4530 	/* Current trace needs to be nop_trace before synchronize_sched */
4531 	tr->current_trace = &nop_trace;
4532 
4533 #ifdef CONFIG_TRACER_MAX_TRACE
4534 	had_max_tr = tr->allocated_snapshot;
4535 
4536 	if (had_max_tr && !t->use_max_tr) {
4537 		/*
4538 		 * We need to make sure that the update_max_tr sees that
4539 		 * current_trace changed to nop_trace to keep it from
4540 		 * swapping the buffers after we resize it.
4541 		 * The update_max_tr is called from interrupts disabled
4542 		 * so a synchronized_sched() is sufficient.
4543 		 */
4544 		synchronize_sched();
4545 		free_snapshot(tr);
4546 	}
4547 #endif
4548 
4549 #ifdef CONFIG_TRACER_MAX_TRACE
4550 	if (t->use_max_tr && !had_max_tr) {
4551 		ret = alloc_snapshot(tr);
4552 		if (ret < 0)
4553 			goto out;
4554 	}
4555 #endif
4556 
4557 	if (t->init) {
4558 		ret = tracer_init(t, tr);
4559 		if (ret)
4560 			goto out;
4561 	}
4562 
4563 	tr->current_trace = t;
4564 	tr->current_trace->enabled++;
4565 	trace_branch_enable(tr);
4566  out:
4567 	mutex_unlock(&trace_types_lock);
4568 
4569 	return ret;
4570 }
4571 
4572 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4573 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4574 			size_t cnt, loff_t *ppos)
4575 {
4576 	struct trace_array *tr = filp->private_data;
4577 	char buf[MAX_TRACER_SIZE+1];
4578 	int i;
4579 	size_t ret;
4580 	int err;
4581 
4582 	ret = cnt;
4583 
4584 	if (cnt > MAX_TRACER_SIZE)
4585 		cnt = MAX_TRACER_SIZE;
4586 
4587 	if (copy_from_user(&buf, ubuf, cnt))
4588 		return -EFAULT;
4589 
4590 	buf[cnt] = 0;
4591 
4592 	/* strip ending whitespace. */
4593 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4594 		buf[i] = 0;
4595 
4596 	err = tracing_set_tracer(tr, buf);
4597 	if (err)
4598 		return err;
4599 
4600 	*ppos += ret;
4601 
4602 	return ret;
4603 }
4604 
4605 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)4606 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4607 		   size_t cnt, loff_t *ppos)
4608 {
4609 	char buf[64];
4610 	int r;
4611 
4612 	r = snprintf(buf, sizeof(buf), "%ld\n",
4613 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4614 	if (r > sizeof(buf))
4615 		r = sizeof(buf);
4616 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4617 }
4618 
4619 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)4620 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4621 		    size_t cnt, loff_t *ppos)
4622 {
4623 	unsigned long val;
4624 	int ret;
4625 
4626 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4627 	if (ret)
4628 		return ret;
4629 
4630 	*ptr = val * 1000;
4631 
4632 	return cnt;
4633 }
4634 
4635 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4636 tracing_thresh_read(struct file *filp, char __user *ubuf,
4637 		    size_t cnt, loff_t *ppos)
4638 {
4639 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4640 }
4641 
4642 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4643 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4644 		     size_t cnt, loff_t *ppos)
4645 {
4646 	struct trace_array *tr = filp->private_data;
4647 	int ret;
4648 
4649 	mutex_lock(&trace_types_lock);
4650 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4651 	if (ret < 0)
4652 		goto out;
4653 
4654 	if (tr->current_trace->update_thresh) {
4655 		ret = tr->current_trace->update_thresh(tr);
4656 		if (ret < 0)
4657 			goto out;
4658 	}
4659 
4660 	ret = cnt;
4661 out:
4662 	mutex_unlock(&trace_types_lock);
4663 
4664 	return ret;
4665 }
4666 
4667 #ifdef CONFIG_TRACER_MAX_TRACE
4668 
4669 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4670 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4671 		     size_t cnt, loff_t *ppos)
4672 {
4673 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4674 }
4675 
4676 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4677 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4678 		      size_t cnt, loff_t *ppos)
4679 {
4680 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4681 }
4682 
4683 #endif
4684 
tracing_open_pipe(struct inode * inode,struct file * filp)4685 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4686 {
4687 	struct trace_array *tr = inode->i_private;
4688 	struct trace_iterator *iter;
4689 	int ret = 0;
4690 
4691 	if (tracing_disabled)
4692 		return -ENODEV;
4693 
4694 	if (trace_array_get(tr) < 0)
4695 		return -ENODEV;
4696 
4697 	mutex_lock(&trace_types_lock);
4698 
4699 	/* create a buffer to store the information to pass to userspace */
4700 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4701 	if (!iter) {
4702 		ret = -ENOMEM;
4703 		__trace_array_put(tr);
4704 		goto out;
4705 	}
4706 
4707 	trace_seq_init(&iter->seq);
4708 	iter->trace = tr->current_trace;
4709 
4710 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4711 		ret = -ENOMEM;
4712 		goto fail;
4713 	}
4714 
4715 	/* trace pipe does not show start of buffer */
4716 	cpumask_setall(iter->started);
4717 
4718 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4719 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
4720 
4721 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4722 	if (trace_clocks[tr->clock_id].in_ns)
4723 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4724 
4725 	iter->tr = tr;
4726 	iter->trace_buffer = &tr->trace_buffer;
4727 	iter->cpu_file = tracing_get_cpu(inode);
4728 	mutex_init(&iter->mutex);
4729 	filp->private_data = iter;
4730 
4731 	if (iter->trace->pipe_open)
4732 		iter->trace->pipe_open(iter);
4733 
4734 	nonseekable_open(inode, filp);
4735 
4736 	tr->current_trace->ref++;
4737 out:
4738 	mutex_unlock(&trace_types_lock);
4739 	return ret;
4740 
4741 fail:
4742 	kfree(iter);
4743 	__trace_array_put(tr);
4744 	mutex_unlock(&trace_types_lock);
4745 	return ret;
4746 }
4747 
tracing_release_pipe(struct inode * inode,struct file * file)4748 static int tracing_release_pipe(struct inode *inode, struct file *file)
4749 {
4750 	struct trace_iterator *iter = file->private_data;
4751 	struct trace_array *tr = inode->i_private;
4752 
4753 	mutex_lock(&trace_types_lock);
4754 
4755 	tr->current_trace->ref--;
4756 
4757 	if (iter->trace->pipe_close)
4758 		iter->trace->pipe_close(iter);
4759 
4760 	mutex_unlock(&trace_types_lock);
4761 
4762 	free_cpumask_var(iter->started);
4763 	mutex_destroy(&iter->mutex);
4764 	kfree(iter);
4765 
4766 	trace_array_put(tr);
4767 
4768 	return 0;
4769 }
4770 
4771 static unsigned int
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)4772 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4773 {
4774 	struct trace_array *tr = iter->tr;
4775 
4776 	/* Iterators are static, they should be filled or empty */
4777 	if (trace_buffer_iter(iter, iter->cpu_file))
4778 		return POLLIN | POLLRDNORM;
4779 
4780 	if (tr->trace_flags & TRACE_ITER_BLOCK)
4781 		/*
4782 		 * Always select as readable when in blocking mode
4783 		 */
4784 		return POLLIN | POLLRDNORM;
4785 	else
4786 		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4787 					     filp, poll_table);
4788 }
4789 
4790 static unsigned int
tracing_poll_pipe(struct file * filp,poll_table * poll_table)4791 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4792 {
4793 	struct trace_iterator *iter = filp->private_data;
4794 
4795 	return trace_poll(iter, filp, poll_table);
4796 }
4797 
4798 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)4799 static int tracing_wait_pipe(struct file *filp)
4800 {
4801 	struct trace_iterator *iter = filp->private_data;
4802 	int ret;
4803 
4804 	while (trace_empty(iter)) {
4805 
4806 		if ((filp->f_flags & O_NONBLOCK)) {
4807 			return -EAGAIN;
4808 		}
4809 
4810 		/*
4811 		 * We block until we read something and tracing is disabled.
4812 		 * We still block if tracing is disabled, but we have never
4813 		 * read anything. This allows a user to cat this file, and
4814 		 * then enable tracing. But after we have read something,
4815 		 * we give an EOF when tracing is again disabled.
4816 		 *
4817 		 * iter->pos will be 0 if we haven't read anything.
4818 		 */
4819 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
4820 			break;
4821 
4822 		mutex_unlock(&iter->mutex);
4823 
4824 		ret = wait_on_pipe(iter, false);
4825 
4826 		mutex_lock(&iter->mutex);
4827 
4828 		if (ret)
4829 			return ret;
4830 	}
4831 
4832 	return 1;
4833 }
4834 
4835 /*
4836  * Consumer reader.
4837  */
4838 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4839 tracing_read_pipe(struct file *filp, char __user *ubuf,
4840 		  size_t cnt, loff_t *ppos)
4841 {
4842 	struct trace_iterator *iter = filp->private_data;
4843 	ssize_t sret;
4844 
4845 	/*
4846 	 * Avoid more than one consumer on a single file descriptor
4847 	 * This is just a matter of traces coherency, the ring buffer itself
4848 	 * is protected.
4849 	 */
4850 	mutex_lock(&iter->mutex);
4851 
4852 	/* return any leftover data */
4853 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4854 	if (sret != -EBUSY)
4855 		goto out;
4856 
4857 	trace_seq_init(&iter->seq);
4858 
4859 	if (iter->trace->read) {
4860 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4861 		if (sret)
4862 			goto out;
4863 	}
4864 
4865 waitagain:
4866 	sret = tracing_wait_pipe(filp);
4867 	if (sret <= 0)
4868 		goto out;
4869 
4870 	/* stop when tracing is finished */
4871 	if (trace_empty(iter)) {
4872 		sret = 0;
4873 		goto out;
4874 	}
4875 
4876 	if (cnt >= PAGE_SIZE)
4877 		cnt = PAGE_SIZE - 1;
4878 
4879 	/* reset all but tr, trace, and overruns */
4880 	memset(&iter->seq, 0,
4881 	       sizeof(struct trace_iterator) -
4882 	       offsetof(struct trace_iterator, seq));
4883 	cpumask_clear(iter->started);
4884 	trace_seq_init(&iter->seq);
4885 	iter->pos = -1;
4886 
4887 	trace_event_read_lock();
4888 	trace_access_lock(iter->cpu_file);
4889 	while (trace_find_next_entry_inc(iter) != NULL) {
4890 		enum print_line_t ret;
4891 		int save_len = iter->seq.seq.len;
4892 
4893 		ret = print_trace_line(iter);
4894 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4895 			/* don't print partial lines */
4896 			iter->seq.seq.len = save_len;
4897 			break;
4898 		}
4899 		if (ret != TRACE_TYPE_NO_CONSUME)
4900 			trace_consume(iter);
4901 
4902 		if (trace_seq_used(&iter->seq) >= cnt)
4903 			break;
4904 
4905 		/*
4906 		 * Setting the full flag means we reached the trace_seq buffer
4907 		 * size and we should leave by partial output condition above.
4908 		 * One of the trace_seq_* functions is not used properly.
4909 		 */
4910 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4911 			  iter->ent->type);
4912 	}
4913 	trace_access_unlock(iter->cpu_file);
4914 	trace_event_read_unlock();
4915 
4916 	/* Now copy what we have to the user */
4917 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4918 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4919 		trace_seq_init(&iter->seq);
4920 
4921 	/*
4922 	 * If there was nothing to send to user, in spite of consuming trace
4923 	 * entries, go back to wait for more entries.
4924 	 */
4925 	if (sret == -EBUSY)
4926 		goto waitagain;
4927 
4928 out:
4929 	mutex_unlock(&iter->mutex);
4930 
4931 	return sret;
4932 }
4933 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)4934 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4935 				     unsigned int idx)
4936 {
4937 	__free_page(spd->pages[idx]);
4938 }
4939 
4940 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4941 	.can_merge		= 0,
4942 	.confirm		= generic_pipe_buf_confirm,
4943 	.release		= generic_pipe_buf_release,
4944 	.steal			= generic_pipe_buf_steal,
4945 	.get			= generic_pipe_buf_get,
4946 };
4947 
4948 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)4949 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4950 {
4951 	size_t count;
4952 	int save_len;
4953 	int ret;
4954 
4955 	/* Seq buffer is page-sized, exactly what we need. */
4956 	for (;;) {
4957 		save_len = iter->seq.seq.len;
4958 		ret = print_trace_line(iter);
4959 
4960 		if (trace_seq_has_overflowed(&iter->seq)) {
4961 			iter->seq.seq.len = save_len;
4962 			break;
4963 		}
4964 
4965 		/*
4966 		 * This should not be hit, because it should only
4967 		 * be set if the iter->seq overflowed. But check it
4968 		 * anyway to be safe.
4969 		 */
4970 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4971 			iter->seq.seq.len = save_len;
4972 			break;
4973 		}
4974 
4975 		count = trace_seq_used(&iter->seq) - save_len;
4976 		if (rem < count) {
4977 			rem = 0;
4978 			iter->seq.seq.len = save_len;
4979 			break;
4980 		}
4981 
4982 		if (ret != TRACE_TYPE_NO_CONSUME)
4983 			trace_consume(iter);
4984 		rem -= count;
4985 		if (!trace_find_next_entry_inc(iter))	{
4986 			rem = 0;
4987 			iter->ent = NULL;
4988 			break;
4989 		}
4990 	}
4991 
4992 	return rem;
4993 }
4994 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)4995 static ssize_t tracing_splice_read_pipe(struct file *filp,
4996 					loff_t *ppos,
4997 					struct pipe_inode_info *pipe,
4998 					size_t len,
4999 					unsigned int flags)
5000 {
5001 	struct page *pages_def[PIPE_DEF_BUFFERS];
5002 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5003 	struct trace_iterator *iter = filp->private_data;
5004 	struct splice_pipe_desc spd = {
5005 		.pages		= pages_def,
5006 		.partial	= partial_def,
5007 		.nr_pages	= 0, /* This gets updated below. */
5008 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5009 		.flags		= flags,
5010 		.ops		= &tracing_pipe_buf_ops,
5011 		.spd_release	= tracing_spd_release_pipe,
5012 	};
5013 	ssize_t ret;
5014 	size_t rem;
5015 	unsigned int i;
5016 
5017 	if (splice_grow_spd(pipe, &spd))
5018 		return -ENOMEM;
5019 
5020 	mutex_lock(&iter->mutex);
5021 
5022 	if (iter->trace->splice_read) {
5023 		ret = iter->trace->splice_read(iter, filp,
5024 					       ppos, pipe, len, flags);
5025 		if (ret)
5026 			goto out_err;
5027 	}
5028 
5029 	ret = tracing_wait_pipe(filp);
5030 	if (ret <= 0)
5031 		goto out_err;
5032 
5033 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5034 		ret = -EFAULT;
5035 		goto out_err;
5036 	}
5037 
5038 	trace_event_read_lock();
5039 	trace_access_lock(iter->cpu_file);
5040 
5041 	/* Fill as many pages as possible. */
5042 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5043 		spd.pages[i] = alloc_page(GFP_KERNEL);
5044 		if (!spd.pages[i])
5045 			break;
5046 
5047 		rem = tracing_fill_pipe_page(rem, iter);
5048 
5049 		/* Copy the data into the page, so we can start over. */
5050 		ret = trace_seq_to_buffer(&iter->seq,
5051 					  page_address(spd.pages[i]),
5052 					  trace_seq_used(&iter->seq));
5053 		if (ret < 0) {
5054 			__free_page(spd.pages[i]);
5055 			break;
5056 		}
5057 		spd.partial[i].offset = 0;
5058 		spd.partial[i].len = trace_seq_used(&iter->seq);
5059 
5060 		trace_seq_init(&iter->seq);
5061 	}
5062 
5063 	trace_access_unlock(iter->cpu_file);
5064 	trace_event_read_unlock();
5065 	mutex_unlock(&iter->mutex);
5066 
5067 	spd.nr_pages = i;
5068 
5069 	if (i)
5070 		ret = splice_to_pipe(pipe, &spd);
5071 	else
5072 		ret = 0;
5073 out:
5074 	splice_shrink_spd(&spd);
5075 	return ret;
5076 
5077 out_err:
5078 	mutex_unlock(&iter->mutex);
5079 	goto out;
5080 }
5081 
5082 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5083 tracing_entries_read(struct file *filp, char __user *ubuf,
5084 		     size_t cnt, loff_t *ppos)
5085 {
5086 	struct inode *inode = file_inode(filp);
5087 	struct trace_array *tr = inode->i_private;
5088 	int cpu = tracing_get_cpu(inode);
5089 	char buf[64];
5090 	int r = 0;
5091 	ssize_t ret;
5092 
5093 	mutex_lock(&trace_types_lock);
5094 
5095 	if (cpu == RING_BUFFER_ALL_CPUS) {
5096 		int cpu, buf_size_same;
5097 		unsigned long size;
5098 
5099 		size = 0;
5100 		buf_size_same = 1;
5101 		/* check if all cpu sizes are same */
5102 		for_each_tracing_cpu(cpu) {
5103 			/* fill in the size from first enabled cpu */
5104 			if (size == 0)
5105 				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5106 			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5107 				buf_size_same = 0;
5108 				break;
5109 			}
5110 		}
5111 
5112 		if (buf_size_same) {
5113 			if (!ring_buffer_expanded)
5114 				r = sprintf(buf, "%lu (expanded: %lu)\n",
5115 					    size >> 10,
5116 					    trace_buf_size >> 10);
5117 			else
5118 				r = sprintf(buf, "%lu\n", size >> 10);
5119 		} else
5120 			r = sprintf(buf, "X\n");
5121 	} else
5122 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5123 
5124 	mutex_unlock(&trace_types_lock);
5125 
5126 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5127 	return ret;
5128 }
5129 
5130 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5131 tracing_entries_write(struct file *filp, const char __user *ubuf,
5132 		      size_t cnt, loff_t *ppos)
5133 {
5134 	struct inode *inode = file_inode(filp);
5135 	struct trace_array *tr = inode->i_private;
5136 	unsigned long val;
5137 	int ret;
5138 
5139 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5140 	if (ret)
5141 		return ret;
5142 
5143 	/* must have at least 1 entry */
5144 	if (!val)
5145 		return -EINVAL;
5146 
5147 	/* value is in KB */
5148 	val <<= 10;
5149 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5150 	if (ret < 0)
5151 		return ret;
5152 
5153 	*ppos += cnt;
5154 
5155 	return cnt;
5156 }
5157 
5158 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5159 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5160 				size_t cnt, loff_t *ppos)
5161 {
5162 	struct trace_array *tr = filp->private_data;
5163 	char buf[64];
5164 	int r, cpu;
5165 	unsigned long size = 0, expanded_size = 0;
5166 
5167 	mutex_lock(&trace_types_lock);
5168 	for_each_tracing_cpu(cpu) {
5169 		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5170 		if (!ring_buffer_expanded)
5171 			expanded_size += trace_buf_size >> 10;
5172 	}
5173 	if (ring_buffer_expanded)
5174 		r = sprintf(buf, "%lu\n", size);
5175 	else
5176 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5177 	mutex_unlock(&trace_types_lock);
5178 
5179 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5180 }
5181 
5182 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5183 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5184 			  size_t cnt, loff_t *ppos)
5185 {
5186 	/*
5187 	 * There is no need to read what the user has written, this function
5188 	 * is just to make sure that there is no error when "echo" is used
5189 	 */
5190 
5191 	*ppos += cnt;
5192 
5193 	return cnt;
5194 }
5195 
5196 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)5197 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5198 {
5199 	struct trace_array *tr = inode->i_private;
5200 
5201 	/* disable tracing ? */
5202 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5203 		tracer_tracing_off(tr);
5204 	/* resize the ring buffer to 0 */
5205 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5206 
5207 	trace_array_put(tr);
5208 
5209 	return 0;
5210 }
5211 
5212 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)5213 tracing_mark_write(struct file *filp, const char __user *ubuf,
5214 					size_t cnt, loff_t *fpos)
5215 {
5216 	unsigned long addr = (unsigned long)ubuf;
5217 	struct trace_array *tr = filp->private_data;
5218 	struct ring_buffer_event *event;
5219 	struct ring_buffer *buffer;
5220 	struct print_entry *entry;
5221 	unsigned long irq_flags;
5222 	struct page *pages[2];
5223 	void *map_page[2];
5224 	int nr_pages = 1;
5225 	ssize_t written;
5226 	int offset;
5227 	int size;
5228 	int len;
5229 	int ret;
5230 	int i;
5231 
5232 	if (tracing_disabled)
5233 		return -EINVAL;
5234 
5235 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5236 		return -EINVAL;
5237 
5238 	if (cnt > TRACE_BUF_SIZE)
5239 		cnt = TRACE_BUF_SIZE;
5240 
5241 	/*
5242 	 * Userspace is injecting traces into the kernel trace buffer.
5243 	 * We want to be as non intrusive as possible.
5244 	 * To do so, we do not want to allocate any special buffers
5245 	 * or take any locks, but instead write the userspace data
5246 	 * straight into the ring buffer.
5247 	 *
5248 	 * First we need to pin the userspace buffer into memory,
5249 	 * which, most likely it is, because it just referenced it.
5250 	 * But there's no guarantee that it is. By using get_user_pages_fast()
5251 	 * and kmap_atomic/kunmap_atomic() we can get access to the
5252 	 * pages directly. We then write the data directly into the
5253 	 * ring buffer.
5254 	 */
5255 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5256 
5257 	/* check if we cross pages */
5258 	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5259 		nr_pages = 2;
5260 
5261 	offset = addr & (PAGE_SIZE - 1);
5262 	addr &= PAGE_MASK;
5263 
5264 	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5265 	if (ret < nr_pages) {
5266 		while (--ret >= 0)
5267 			put_page(pages[ret]);
5268 		written = -EFAULT;
5269 		goto out;
5270 	}
5271 
5272 	for (i = 0; i < nr_pages; i++)
5273 		map_page[i] = kmap_atomic(pages[i]);
5274 
5275 	local_save_flags(irq_flags);
5276 	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5277 	buffer = tr->trace_buffer.buffer;
5278 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5279 					  irq_flags, preempt_count());
5280 	if (!event) {
5281 		/* Ring buffer disabled, return as if not open for write */
5282 		written = -EBADF;
5283 		goto out_unlock;
5284 	}
5285 
5286 	entry = ring_buffer_event_data(event);
5287 	entry->ip = _THIS_IP_;
5288 
5289 	if (nr_pages == 2) {
5290 		len = PAGE_SIZE - offset;
5291 		memcpy(&entry->buf, map_page[0] + offset, len);
5292 		memcpy(&entry->buf[len], map_page[1], cnt - len);
5293 	} else
5294 		memcpy(&entry->buf, map_page[0] + offset, cnt);
5295 
5296 	if (entry->buf[cnt - 1] != '\n') {
5297 		entry->buf[cnt] = '\n';
5298 		entry->buf[cnt + 1] = '\0';
5299 	} else
5300 		entry->buf[cnt] = '\0';
5301 
5302 	__buffer_unlock_commit(buffer, event);
5303 
5304 	written = cnt;
5305 
5306 	*fpos += written;
5307 
5308  out_unlock:
5309 	for (i = nr_pages - 1; i >= 0; i--) {
5310 		kunmap_atomic(map_page[i]);
5311 		put_page(pages[i]);
5312 	}
5313  out:
5314 	return written;
5315 }
5316 
tracing_clock_show(struct seq_file * m,void * v)5317 static int tracing_clock_show(struct seq_file *m, void *v)
5318 {
5319 	struct trace_array *tr = m->private;
5320 	int i;
5321 
5322 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5323 		seq_printf(m,
5324 			"%s%s%s%s", i ? " " : "",
5325 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5326 			i == tr->clock_id ? "]" : "");
5327 	seq_putc(m, '\n');
5328 
5329 	return 0;
5330 }
5331 
tracing_set_clock(struct trace_array * tr,const char * clockstr)5332 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5333 {
5334 	int i;
5335 
5336 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5337 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5338 			break;
5339 	}
5340 	if (i == ARRAY_SIZE(trace_clocks))
5341 		return -EINVAL;
5342 
5343 	mutex_lock(&trace_types_lock);
5344 
5345 	tr->clock_id = i;
5346 
5347 	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5348 
5349 	/*
5350 	 * New clock may not be consistent with the previous clock.
5351 	 * Reset the buffer so that it doesn't have incomparable timestamps.
5352 	 */
5353 	tracing_reset_online_cpus(&tr->trace_buffer);
5354 
5355 #ifdef CONFIG_TRACER_MAX_TRACE
5356 	if (tr->max_buffer.buffer)
5357 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5358 	tracing_reset_online_cpus(&tr->max_buffer);
5359 #endif
5360 
5361 	mutex_unlock(&trace_types_lock);
5362 
5363 	return 0;
5364 }
5365 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)5366 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5367 				   size_t cnt, loff_t *fpos)
5368 {
5369 	struct seq_file *m = filp->private_data;
5370 	struct trace_array *tr = m->private;
5371 	char buf[64];
5372 	const char *clockstr;
5373 	int ret;
5374 
5375 	if (cnt >= sizeof(buf))
5376 		return -EINVAL;
5377 
5378 	if (copy_from_user(&buf, ubuf, cnt))
5379 		return -EFAULT;
5380 
5381 	buf[cnt] = 0;
5382 
5383 	clockstr = strstrip(buf);
5384 
5385 	ret = tracing_set_clock(tr, clockstr);
5386 	if (ret)
5387 		return ret;
5388 
5389 	*fpos += cnt;
5390 
5391 	return cnt;
5392 }
5393 
tracing_clock_open(struct inode * inode,struct file * file)5394 static int tracing_clock_open(struct inode *inode, struct file *file)
5395 {
5396 	struct trace_array *tr = inode->i_private;
5397 	int ret;
5398 
5399 	if (tracing_disabled)
5400 		return -ENODEV;
5401 
5402 	if (trace_array_get(tr))
5403 		return -ENODEV;
5404 
5405 	ret = single_open(file, tracing_clock_show, inode->i_private);
5406 	if (ret < 0)
5407 		trace_array_put(tr);
5408 
5409 	return ret;
5410 }
5411 
5412 struct ftrace_buffer_info {
5413 	struct trace_iterator	iter;
5414 	void			*spare;
5415 	unsigned int		read;
5416 };
5417 
5418 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)5419 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5420 {
5421 	struct trace_array *tr = inode->i_private;
5422 	struct trace_iterator *iter;
5423 	struct seq_file *m;
5424 	int ret = 0;
5425 
5426 	if (trace_array_get(tr) < 0)
5427 		return -ENODEV;
5428 
5429 	if (file->f_mode & FMODE_READ) {
5430 		iter = __tracing_open(inode, file, true);
5431 		if (IS_ERR(iter))
5432 			ret = PTR_ERR(iter);
5433 	} else {
5434 		/* Writes still need the seq_file to hold the private data */
5435 		ret = -ENOMEM;
5436 		m = kzalloc(sizeof(*m), GFP_KERNEL);
5437 		if (!m)
5438 			goto out;
5439 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5440 		if (!iter) {
5441 			kfree(m);
5442 			goto out;
5443 		}
5444 		ret = 0;
5445 
5446 		iter->tr = tr;
5447 		iter->trace_buffer = &tr->max_buffer;
5448 		iter->cpu_file = tracing_get_cpu(inode);
5449 		m->private = iter;
5450 		file->private_data = m;
5451 	}
5452 out:
5453 	if (ret < 0)
5454 		trace_array_put(tr);
5455 
5456 	return ret;
5457 }
5458 
5459 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5460 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5461 		       loff_t *ppos)
5462 {
5463 	struct seq_file *m = filp->private_data;
5464 	struct trace_iterator *iter = m->private;
5465 	struct trace_array *tr = iter->tr;
5466 	unsigned long val;
5467 	int ret;
5468 
5469 	ret = tracing_update_buffers();
5470 	if (ret < 0)
5471 		return ret;
5472 
5473 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5474 	if (ret)
5475 		return ret;
5476 
5477 	mutex_lock(&trace_types_lock);
5478 
5479 	if (tr->current_trace->use_max_tr) {
5480 		ret = -EBUSY;
5481 		goto out;
5482 	}
5483 
5484 	switch (val) {
5485 	case 0:
5486 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5487 			ret = -EINVAL;
5488 			break;
5489 		}
5490 		if (tr->allocated_snapshot)
5491 			free_snapshot(tr);
5492 		break;
5493 	case 1:
5494 /* Only allow per-cpu swap if the ring buffer supports it */
5495 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5496 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5497 			ret = -EINVAL;
5498 			break;
5499 		}
5500 #endif
5501 		if (!tr->allocated_snapshot)
5502 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
5503 				&tr->trace_buffer, iter->cpu_file);
5504 		else
5505 			ret = alloc_snapshot(tr);
5506 
5507 		if (ret < 0)
5508 			break;
5509 
5510 		local_irq_disable();
5511 		/* Now, we're going to swap */
5512 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5513 			update_max_tr(tr, current, smp_processor_id());
5514 		else
5515 			update_max_tr_single(tr, current, iter->cpu_file);
5516 		local_irq_enable();
5517 		break;
5518 	default:
5519 		if (tr->allocated_snapshot) {
5520 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5521 				tracing_reset_online_cpus(&tr->max_buffer);
5522 			else
5523 				tracing_reset(&tr->max_buffer, iter->cpu_file);
5524 		}
5525 		break;
5526 	}
5527 
5528 	if (ret >= 0) {
5529 		*ppos += cnt;
5530 		ret = cnt;
5531 	}
5532 out:
5533 	mutex_unlock(&trace_types_lock);
5534 	return ret;
5535 }
5536 
tracing_snapshot_release(struct inode * inode,struct file * file)5537 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5538 {
5539 	struct seq_file *m = file->private_data;
5540 	int ret;
5541 
5542 	ret = tracing_release(inode, file);
5543 
5544 	if (file->f_mode & FMODE_READ)
5545 		return ret;
5546 
5547 	/* If write only, the seq_file is just a stub */
5548 	if (m)
5549 		kfree(m->private);
5550 	kfree(m);
5551 
5552 	return 0;
5553 }
5554 
5555 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5556 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5557 				    size_t count, loff_t *ppos);
5558 static int tracing_buffers_release(struct inode *inode, struct file *file);
5559 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5560 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5561 
snapshot_raw_open(struct inode * inode,struct file * filp)5562 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5563 {
5564 	struct ftrace_buffer_info *info;
5565 	int ret;
5566 
5567 	ret = tracing_buffers_open(inode, filp);
5568 	if (ret < 0)
5569 		return ret;
5570 
5571 	info = filp->private_data;
5572 
5573 	if (info->iter.trace->use_max_tr) {
5574 		tracing_buffers_release(inode, filp);
5575 		return -EBUSY;
5576 	}
5577 
5578 	info->iter.snapshot = true;
5579 	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5580 
5581 	return ret;
5582 }
5583 
5584 #endif /* CONFIG_TRACER_SNAPSHOT */
5585 
5586 
5587 static const struct file_operations tracing_thresh_fops = {
5588 	.open		= tracing_open_generic,
5589 	.read		= tracing_thresh_read,
5590 	.write		= tracing_thresh_write,
5591 	.llseek		= generic_file_llseek,
5592 };
5593 
5594 #ifdef CONFIG_TRACER_MAX_TRACE
5595 static const struct file_operations tracing_max_lat_fops = {
5596 	.open		= tracing_open_generic,
5597 	.read		= tracing_max_lat_read,
5598 	.write		= tracing_max_lat_write,
5599 	.llseek		= generic_file_llseek,
5600 };
5601 #endif
5602 
5603 static const struct file_operations set_tracer_fops = {
5604 	.open		= tracing_open_generic,
5605 	.read		= tracing_set_trace_read,
5606 	.write		= tracing_set_trace_write,
5607 	.llseek		= generic_file_llseek,
5608 };
5609 
5610 static const struct file_operations tracing_pipe_fops = {
5611 	.open		= tracing_open_pipe,
5612 	.poll		= tracing_poll_pipe,
5613 	.read		= tracing_read_pipe,
5614 	.splice_read	= tracing_splice_read_pipe,
5615 	.release	= tracing_release_pipe,
5616 	.llseek		= no_llseek,
5617 };
5618 
5619 static const struct file_operations tracing_entries_fops = {
5620 	.open		= tracing_open_generic_tr,
5621 	.read		= tracing_entries_read,
5622 	.write		= tracing_entries_write,
5623 	.llseek		= generic_file_llseek,
5624 	.release	= tracing_release_generic_tr,
5625 };
5626 
5627 static const struct file_operations tracing_total_entries_fops = {
5628 	.open		= tracing_open_generic_tr,
5629 	.read		= tracing_total_entries_read,
5630 	.llseek		= generic_file_llseek,
5631 	.release	= tracing_release_generic_tr,
5632 };
5633 
5634 static const struct file_operations tracing_free_buffer_fops = {
5635 	.open		= tracing_open_generic_tr,
5636 	.write		= tracing_free_buffer_write,
5637 	.release	= tracing_free_buffer_release,
5638 };
5639 
5640 static const struct file_operations tracing_mark_fops = {
5641 	.open		= tracing_open_generic_tr,
5642 	.write		= tracing_mark_write,
5643 	.llseek		= generic_file_llseek,
5644 	.release	= tracing_release_generic_tr,
5645 };
5646 
5647 static const struct file_operations trace_clock_fops = {
5648 	.open		= tracing_clock_open,
5649 	.read		= seq_read,
5650 	.llseek		= seq_lseek,
5651 	.release	= tracing_single_release_tr,
5652 	.write		= tracing_clock_write,
5653 };
5654 
5655 #ifdef CONFIG_TRACER_SNAPSHOT
5656 static const struct file_operations snapshot_fops = {
5657 	.open		= tracing_snapshot_open,
5658 	.read		= seq_read,
5659 	.write		= tracing_snapshot_write,
5660 	.llseek		= tracing_lseek,
5661 	.release	= tracing_snapshot_release,
5662 };
5663 
5664 static const struct file_operations snapshot_raw_fops = {
5665 	.open		= snapshot_raw_open,
5666 	.read		= tracing_buffers_read,
5667 	.release	= tracing_buffers_release,
5668 	.splice_read	= tracing_buffers_splice_read,
5669 	.llseek		= no_llseek,
5670 };
5671 
5672 #endif /* CONFIG_TRACER_SNAPSHOT */
5673 
tracing_buffers_open(struct inode * inode,struct file * filp)5674 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5675 {
5676 	struct trace_array *tr = inode->i_private;
5677 	struct ftrace_buffer_info *info;
5678 	int ret;
5679 
5680 	if (tracing_disabled)
5681 		return -ENODEV;
5682 
5683 	if (trace_array_get(tr) < 0)
5684 		return -ENODEV;
5685 
5686 	info = kzalloc(sizeof(*info), GFP_KERNEL);
5687 	if (!info) {
5688 		trace_array_put(tr);
5689 		return -ENOMEM;
5690 	}
5691 
5692 	mutex_lock(&trace_types_lock);
5693 
5694 	info->iter.tr		= tr;
5695 	info->iter.cpu_file	= tracing_get_cpu(inode);
5696 	info->iter.trace	= tr->current_trace;
5697 	info->iter.trace_buffer = &tr->trace_buffer;
5698 	info->spare		= NULL;
5699 	/* Force reading ring buffer for first read */
5700 	info->read		= (unsigned int)-1;
5701 
5702 	filp->private_data = info;
5703 
5704 	tr->current_trace->ref++;
5705 
5706 	mutex_unlock(&trace_types_lock);
5707 
5708 	ret = nonseekable_open(inode, filp);
5709 	if (ret < 0)
5710 		trace_array_put(tr);
5711 
5712 	return ret;
5713 }
5714 
5715 static unsigned int
tracing_buffers_poll(struct file * filp,poll_table * poll_table)5716 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5717 {
5718 	struct ftrace_buffer_info *info = filp->private_data;
5719 	struct trace_iterator *iter = &info->iter;
5720 
5721 	return trace_poll(iter, filp, poll_table);
5722 }
5723 
5724 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5725 tracing_buffers_read(struct file *filp, char __user *ubuf,
5726 		     size_t count, loff_t *ppos)
5727 {
5728 	struct ftrace_buffer_info *info = filp->private_data;
5729 	struct trace_iterator *iter = &info->iter;
5730 	ssize_t ret;
5731 	ssize_t size;
5732 
5733 	if (!count)
5734 		return 0;
5735 
5736 #ifdef CONFIG_TRACER_MAX_TRACE
5737 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5738 		return -EBUSY;
5739 #endif
5740 
5741 	if (!info->spare)
5742 		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5743 							  iter->cpu_file);
5744 	if (!info->spare)
5745 		return -ENOMEM;
5746 
5747 	/* Do we have previous read data to read? */
5748 	if (info->read < PAGE_SIZE)
5749 		goto read;
5750 
5751  again:
5752 	trace_access_lock(iter->cpu_file);
5753 	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5754 				    &info->spare,
5755 				    count,
5756 				    iter->cpu_file, 0);
5757 	trace_access_unlock(iter->cpu_file);
5758 
5759 	if (ret < 0) {
5760 		if (trace_empty(iter)) {
5761 			if ((filp->f_flags & O_NONBLOCK))
5762 				return -EAGAIN;
5763 
5764 			ret = wait_on_pipe(iter, false);
5765 			if (ret)
5766 				return ret;
5767 
5768 			goto again;
5769 		}
5770 		return 0;
5771 	}
5772 
5773 	info->read = 0;
5774  read:
5775 	size = PAGE_SIZE - info->read;
5776 	if (size > count)
5777 		size = count;
5778 
5779 	ret = copy_to_user(ubuf, info->spare + info->read, size);
5780 	if (ret == size)
5781 		return -EFAULT;
5782 
5783 	size -= ret;
5784 
5785 	*ppos += size;
5786 	info->read += size;
5787 
5788 	return size;
5789 }
5790 
tracing_buffers_release(struct inode * inode,struct file * file)5791 static int tracing_buffers_release(struct inode *inode, struct file *file)
5792 {
5793 	struct ftrace_buffer_info *info = file->private_data;
5794 	struct trace_iterator *iter = &info->iter;
5795 
5796 	mutex_lock(&trace_types_lock);
5797 
5798 	iter->tr->current_trace->ref--;
5799 
5800 	__trace_array_put(iter->tr);
5801 
5802 	if (info->spare)
5803 		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5804 	kfree(info);
5805 
5806 	mutex_unlock(&trace_types_lock);
5807 
5808 	return 0;
5809 }
5810 
5811 struct buffer_ref {
5812 	struct ring_buffer	*buffer;
5813 	void			*page;
5814 	int			ref;
5815 };
5816 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)5817 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5818 				    struct pipe_buffer *buf)
5819 {
5820 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5821 
5822 	if (--ref->ref)
5823 		return;
5824 
5825 	ring_buffer_free_read_page(ref->buffer, ref->page);
5826 	kfree(ref);
5827 	buf->private = 0;
5828 }
5829 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)5830 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5831 				struct pipe_buffer *buf)
5832 {
5833 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5834 
5835 	if (ref->ref > INT_MAX/2)
5836 		return false;
5837 
5838 	ref->ref++;
5839 	return true;
5840 }
5841 
5842 /* Pipe buffer operations for a buffer. */
5843 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5844 	.can_merge		= 0,
5845 	.confirm		= generic_pipe_buf_confirm,
5846 	.release		= buffer_pipe_buf_release,
5847 	.steal			= generic_pipe_buf_steal,
5848 	.get			= buffer_pipe_buf_get,
5849 };
5850 
5851 /*
5852  * Callback from splice_to_pipe(), if we need to release some pages
5853  * at the end of the spd in case we error'ed out in filling the pipe.
5854  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)5855 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5856 {
5857 	struct buffer_ref *ref =
5858 		(struct buffer_ref *)spd->partial[i].private;
5859 
5860 	if (--ref->ref)
5861 		return;
5862 
5863 	ring_buffer_free_read_page(ref->buffer, ref->page);
5864 	kfree(ref);
5865 	spd->partial[i].private = 0;
5866 }
5867 
5868 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)5869 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5870 			    struct pipe_inode_info *pipe, size_t len,
5871 			    unsigned int flags)
5872 {
5873 	struct ftrace_buffer_info *info = file->private_data;
5874 	struct trace_iterator *iter = &info->iter;
5875 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5876 	struct page *pages_def[PIPE_DEF_BUFFERS];
5877 	struct splice_pipe_desc spd = {
5878 		.pages		= pages_def,
5879 		.partial	= partial_def,
5880 		.nr_pages_max	= PIPE_DEF_BUFFERS,
5881 		.flags		= flags,
5882 		.ops		= &buffer_pipe_buf_ops,
5883 		.spd_release	= buffer_spd_release,
5884 	};
5885 	struct buffer_ref *ref;
5886 	int entries, i;
5887 	ssize_t ret = 0;
5888 
5889 #ifdef CONFIG_TRACER_MAX_TRACE
5890 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5891 		return -EBUSY;
5892 #endif
5893 
5894 	if (*ppos & (PAGE_SIZE - 1))
5895 		return -EINVAL;
5896 
5897 	if (len & (PAGE_SIZE - 1)) {
5898 		if (len < PAGE_SIZE)
5899 			return -EINVAL;
5900 		len &= PAGE_MASK;
5901 	}
5902 
5903 	if (splice_grow_spd(pipe, &spd))
5904 		return -ENOMEM;
5905 
5906  again:
5907 	trace_access_lock(iter->cpu_file);
5908 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5909 
5910 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5911 		struct page *page;
5912 		int r;
5913 
5914 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5915 		if (!ref) {
5916 			ret = -ENOMEM;
5917 			break;
5918 		}
5919 
5920 		ref->ref = 1;
5921 		ref->buffer = iter->trace_buffer->buffer;
5922 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5923 		if (!ref->page) {
5924 			ret = -ENOMEM;
5925 			kfree(ref);
5926 			break;
5927 		}
5928 
5929 		r = ring_buffer_read_page(ref->buffer, &ref->page,
5930 					  len, iter->cpu_file, 1);
5931 		if (r < 0) {
5932 			ring_buffer_free_read_page(ref->buffer, ref->page);
5933 			kfree(ref);
5934 			break;
5935 		}
5936 
5937 		page = virt_to_page(ref->page);
5938 
5939 		spd.pages[i] = page;
5940 		spd.partial[i].len = PAGE_SIZE;
5941 		spd.partial[i].offset = 0;
5942 		spd.partial[i].private = (unsigned long)ref;
5943 		spd.nr_pages++;
5944 		*ppos += PAGE_SIZE;
5945 
5946 		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5947 	}
5948 
5949 	trace_access_unlock(iter->cpu_file);
5950 	spd.nr_pages = i;
5951 
5952 	/* did we read anything? */
5953 	if (!spd.nr_pages) {
5954 		if (ret)
5955 			goto out;
5956 
5957 		ret = -EAGAIN;
5958 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5959 			goto out;
5960 
5961 		ret = wait_on_pipe(iter, true);
5962 		if (ret)
5963 			goto out;
5964 
5965 		goto again;
5966 	}
5967 
5968 	ret = splice_to_pipe(pipe, &spd);
5969 out:
5970 	splice_shrink_spd(&spd);
5971 
5972 	return ret;
5973 }
5974 
5975 static const struct file_operations tracing_buffers_fops = {
5976 	.open		= tracing_buffers_open,
5977 	.read		= tracing_buffers_read,
5978 	.poll		= tracing_buffers_poll,
5979 	.release	= tracing_buffers_release,
5980 	.splice_read	= tracing_buffers_splice_read,
5981 	.llseek		= no_llseek,
5982 };
5983 
5984 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5985 tracing_stats_read(struct file *filp, char __user *ubuf,
5986 		   size_t count, loff_t *ppos)
5987 {
5988 	struct inode *inode = file_inode(filp);
5989 	struct trace_array *tr = inode->i_private;
5990 	struct trace_buffer *trace_buf = &tr->trace_buffer;
5991 	int cpu = tracing_get_cpu(inode);
5992 	struct trace_seq *s;
5993 	unsigned long cnt;
5994 	unsigned long long t;
5995 	unsigned long usec_rem;
5996 
5997 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5998 	if (!s)
5999 		return -ENOMEM;
6000 
6001 	trace_seq_init(s);
6002 
6003 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6004 	trace_seq_printf(s, "entries: %ld\n", cnt);
6005 
6006 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6007 	trace_seq_printf(s, "overrun: %ld\n", cnt);
6008 
6009 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6010 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6011 
6012 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6013 	trace_seq_printf(s, "bytes: %ld\n", cnt);
6014 
6015 	if (trace_clocks[tr->clock_id].in_ns) {
6016 		/* local or global for trace_clock */
6017 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6018 		usec_rem = do_div(t, USEC_PER_SEC);
6019 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6020 								t, usec_rem);
6021 
6022 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6023 		usec_rem = do_div(t, USEC_PER_SEC);
6024 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6025 	} else {
6026 		/* counter or tsc mode for trace_clock */
6027 		trace_seq_printf(s, "oldest event ts: %llu\n",
6028 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6029 
6030 		trace_seq_printf(s, "now ts: %llu\n",
6031 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
6032 	}
6033 
6034 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6035 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
6036 
6037 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6038 	trace_seq_printf(s, "read events: %ld\n", cnt);
6039 
6040 	count = simple_read_from_buffer(ubuf, count, ppos,
6041 					s->buffer, trace_seq_used(s));
6042 
6043 	kfree(s);
6044 
6045 	return count;
6046 }
6047 
6048 static const struct file_operations tracing_stats_fops = {
6049 	.open		= tracing_open_generic_tr,
6050 	.read		= tracing_stats_read,
6051 	.llseek		= generic_file_llseek,
6052 	.release	= tracing_release_generic_tr,
6053 };
6054 
6055 #ifdef CONFIG_DYNAMIC_FTRACE
6056 
ftrace_arch_read_dyn_info(char * buf,int size)6057 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6058 {
6059 	return 0;
6060 }
6061 
6062 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6063 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6064 		  size_t cnt, loff_t *ppos)
6065 {
6066 	static char ftrace_dyn_info_buffer[1024];
6067 	static DEFINE_MUTEX(dyn_info_mutex);
6068 	unsigned long *p = filp->private_data;
6069 	char *buf = ftrace_dyn_info_buffer;
6070 	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6071 	int r;
6072 
6073 	mutex_lock(&dyn_info_mutex);
6074 	r = sprintf(buf, "%ld ", *p);
6075 
6076 	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6077 	buf[r++] = '\n';
6078 
6079 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6080 
6081 	mutex_unlock(&dyn_info_mutex);
6082 
6083 	return r;
6084 }
6085 
6086 static const struct file_operations tracing_dyn_info_fops = {
6087 	.open		= tracing_open_generic,
6088 	.read		= tracing_read_dyn_info,
6089 	.llseek		= generic_file_llseek,
6090 };
6091 #endif /* CONFIG_DYNAMIC_FTRACE */
6092 
6093 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6094 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,void ** data)6095 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6096 {
6097 	tracing_snapshot();
6098 }
6099 
6100 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,void ** data)6101 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6102 {
6103 	unsigned long *count = (long *)data;
6104 
6105 	if (!*count)
6106 		return;
6107 
6108 	if (*count != -1)
6109 		(*count)--;
6110 
6111 	tracing_snapshot();
6112 }
6113 
6114 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)6115 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6116 		      struct ftrace_probe_ops *ops, void *data)
6117 {
6118 	long count = (long)data;
6119 
6120 	seq_printf(m, "%ps:", (void *)ip);
6121 
6122 	seq_puts(m, "snapshot");
6123 
6124 	if (count == -1)
6125 		seq_puts(m, ":unlimited\n");
6126 	else
6127 		seq_printf(m, ":count=%ld\n", count);
6128 
6129 	return 0;
6130 }
6131 
6132 static struct ftrace_probe_ops snapshot_probe_ops = {
6133 	.func			= ftrace_snapshot,
6134 	.print			= ftrace_snapshot_print,
6135 };
6136 
6137 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6138 	.func			= ftrace_count_snapshot,
6139 	.print			= ftrace_snapshot_print,
6140 };
6141 
6142 static int
ftrace_trace_snapshot_callback(struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)6143 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6144 			       char *glob, char *cmd, char *param, int enable)
6145 {
6146 	struct ftrace_probe_ops *ops;
6147 	void *count = (void *)-1;
6148 	char *number;
6149 	int ret;
6150 
6151 	/* hash funcs only work with set_ftrace_filter */
6152 	if (!enable)
6153 		return -EINVAL;
6154 
6155 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6156 
6157 	if (glob[0] == '!') {
6158 		unregister_ftrace_function_probe_func(glob+1, ops);
6159 		return 0;
6160 	}
6161 
6162 	if (!param)
6163 		goto out_reg;
6164 
6165 	number = strsep(&param, ":");
6166 
6167 	if (!strlen(number))
6168 		goto out_reg;
6169 
6170 	/*
6171 	 * We use the callback data field (which is a pointer)
6172 	 * as our counter.
6173 	 */
6174 	ret = kstrtoul(number, 0, (unsigned long *)&count);
6175 	if (ret)
6176 		return ret;
6177 
6178  out_reg:
6179 	ret = alloc_snapshot(&global_trace);
6180 	if (ret < 0)
6181 		goto out;
6182 
6183 	ret = register_ftrace_function_probe(glob, ops, count);
6184 
6185  out:
6186 	return ret < 0 ? ret : 0;
6187 }
6188 
6189 static struct ftrace_func_command ftrace_snapshot_cmd = {
6190 	.name			= "snapshot",
6191 	.func			= ftrace_trace_snapshot_callback,
6192 };
6193 
register_snapshot_cmd(void)6194 static __init int register_snapshot_cmd(void)
6195 {
6196 	return register_ftrace_command(&ftrace_snapshot_cmd);
6197 }
6198 #else
register_snapshot_cmd(void)6199 static inline __init int register_snapshot_cmd(void) { return 0; }
6200 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6201 
tracing_get_dentry(struct trace_array * tr)6202 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6203 {
6204 	if (WARN_ON(!tr->dir))
6205 		return ERR_PTR(-ENODEV);
6206 
6207 	/* Top directory uses NULL as the parent */
6208 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6209 		return NULL;
6210 
6211 	/* All sub buffers have a descriptor */
6212 	return tr->dir;
6213 }
6214 
tracing_dentry_percpu(struct trace_array * tr,int cpu)6215 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6216 {
6217 	struct dentry *d_tracer;
6218 
6219 	if (tr->percpu_dir)
6220 		return tr->percpu_dir;
6221 
6222 	d_tracer = tracing_get_dentry(tr);
6223 	if (IS_ERR(d_tracer))
6224 		return NULL;
6225 
6226 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6227 
6228 	WARN_ONCE(!tr->percpu_dir,
6229 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6230 
6231 	return tr->percpu_dir;
6232 }
6233 
6234 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)6235 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6236 		      void *data, long cpu, const struct file_operations *fops)
6237 {
6238 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6239 
6240 	if (ret) /* See tracing_get_cpu() */
6241 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6242 	return ret;
6243 }
6244 
6245 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)6246 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6247 {
6248 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6249 	struct dentry *d_cpu;
6250 	char cpu_dir[30]; /* 30 characters should be more than enough */
6251 
6252 	if (!d_percpu)
6253 		return;
6254 
6255 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6256 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6257 	if (!d_cpu) {
6258 		pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6259 		return;
6260 	}
6261 
6262 	/* per cpu trace_pipe */
6263 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6264 				tr, cpu, &tracing_pipe_fops);
6265 
6266 	/* per cpu trace */
6267 	trace_create_cpu_file("trace", 0644, d_cpu,
6268 				tr, cpu, &tracing_fops);
6269 
6270 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6271 				tr, cpu, &tracing_buffers_fops);
6272 
6273 	trace_create_cpu_file("stats", 0444, d_cpu,
6274 				tr, cpu, &tracing_stats_fops);
6275 
6276 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6277 				tr, cpu, &tracing_entries_fops);
6278 
6279 #ifdef CONFIG_TRACER_SNAPSHOT
6280 	trace_create_cpu_file("snapshot", 0644, d_cpu,
6281 				tr, cpu, &snapshot_fops);
6282 
6283 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6284 				tr, cpu, &snapshot_raw_fops);
6285 #endif
6286 }
6287 
6288 #ifdef CONFIG_FTRACE_SELFTEST
6289 /* Let selftest have access to static functions in this file */
6290 #include "trace_selftest.c"
6291 #endif
6292 
6293 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6294 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6295 			loff_t *ppos)
6296 {
6297 	struct trace_option_dentry *topt = filp->private_data;
6298 	char *buf;
6299 
6300 	if (topt->flags->val & topt->opt->bit)
6301 		buf = "1\n";
6302 	else
6303 		buf = "0\n";
6304 
6305 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6306 }
6307 
6308 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6309 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6310 			 loff_t *ppos)
6311 {
6312 	struct trace_option_dentry *topt = filp->private_data;
6313 	unsigned long val;
6314 	int ret;
6315 
6316 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6317 	if (ret)
6318 		return ret;
6319 
6320 	if (val != 0 && val != 1)
6321 		return -EINVAL;
6322 
6323 	if (!!(topt->flags->val & topt->opt->bit) != val) {
6324 		mutex_lock(&trace_types_lock);
6325 		ret = __set_tracer_option(topt->tr, topt->flags,
6326 					  topt->opt, !val);
6327 		mutex_unlock(&trace_types_lock);
6328 		if (ret)
6329 			return ret;
6330 	}
6331 
6332 	*ppos += cnt;
6333 
6334 	return cnt;
6335 }
6336 
6337 
6338 static const struct file_operations trace_options_fops = {
6339 	.open = tracing_open_generic,
6340 	.read = trace_options_read,
6341 	.write = trace_options_write,
6342 	.llseek	= generic_file_llseek,
6343 };
6344 
6345 /*
6346  * In order to pass in both the trace_array descriptor as well as the index
6347  * to the flag that the trace option file represents, the trace_array
6348  * has a character array of trace_flags_index[], which holds the index
6349  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6350  * The address of this character array is passed to the flag option file
6351  * read/write callbacks.
6352  *
6353  * In order to extract both the index and the trace_array descriptor,
6354  * get_tr_index() uses the following algorithm.
6355  *
6356  *   idx = *ptr;
6357  *
6358  * As the pointer itself contains the address of the index (remember
6359  * index[1] == 1).
6360  *
6361  * Then to get the trace_array descriptor, by subtracting that index
6362  * from the ptr, we get to the start of the index itself.
6363  *
6364  *   ptr - idx == &index[0]
6365  *
6366  * Then a simple container_of() from that pointer gets us to the
6367  * trace_array descriptor.
6368  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)6369 static void get_tr_index(void *data, struct trace_array **ptr,
6370 			 unsigned int *pindex)
6371 {
6372 	*pindex = *(unsigned char *)data;
6373 
6374 	*ptr = container_of(data - *pindex, struct trace_array,
6375 			    trace_flags_index);
6376 }
6377 
6378 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6379 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6380 			loff_t *ppos)
6381 {
6382 	void *tr_index = filp->private_data;
6383 	struct trace_array *tr;
6384 	unsigned int index;
6385 	char *buf;
6386 
6387 	get_tr_index(tr_index, &tr, &index);
6388 
6389 	if (tr->trace_flags & (1 << index))
6390 		buf = "1\n";
6391 	else
6392 		buf = "0\n";
6393 
6394 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6395 }
6396 
6397 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6398 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6399 			 loff_t *ppos)
6400 {
6401 	void *tr_index = filp->private_data;
6402 	struct trace_array *tr;
6403 	unsigned int index;
6404 	unsigned long val;
6405 	int ret;
6406 
6407 	get_tr_index(tr_index, &tr, &index);
6408 
6409 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6410 	if (ret)
6411 		return ret;
6412 
6413 	if (val != 0 && val != 1)
6414 		return -EINVAL;
6415 
6416 	mutex_lock(&trace_types_lock);
6417 	ret = set_tracer_flag(tr, 1 << index, val);
6418 	mutex_unlock(&trace_types_lock);
6419 
6420 	if (ret < 0)
6421 		return ret;
6422 
6423 	*ppos += cnt;
6424 
6425 	return cnt;
6426 }
6427 
6428 static const struct file_operations trace_options_core_fops = {
6429 	.open = tracing_open_generic,
6430 	.read = trace_options_core_read,
6431 	.write = trace_options_core_write,
6432 	.llseek = generic_file_llseek,
6433 };
6434 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)6435 struct dentry *trace_create_file(const char *name,
6436 				 umode_t mode,
6437 				 struct dentry *parent,
6438 				 void *data,
6439 				 const struct file_operations *fops)
6440 {
6441 	struct dentry *ret;
6442 
6443 	ret = tracefs_create_file(name, mode, parent, data, fops);
6444 	if (!ret)
6445 		pr_warning("Could not create tracefs '%s' entry\n", name);
6446 
6447 	return ret;
6448 }
6449 
6450 
trace_options_init_dentry(struct trace_array * tr)6451 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6452 {
6453 	struct dentry *d_tracer;
6454 
6455 	if (tr->options)
6456 		return tr->options;
6457 
6458 	d_tracer = tracing_get_dentry(tr);
6459 	if (IS_ERR(d_tracer))
6460 		return NULL;
6461 
6462 	tr->options = tracefs_create_dir("options", d_tracer);
6463 	if (!tr->options) {
6464 		pr_warning("Could not create tracefs directory 'options'\n");
6465 		return NULL;
6466 	}
6467 
6468 	return tr->options;
6469 }
6470 
6471 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)6472 create_trace_option_file(struct trace_array *tr,
6473 			 struct trace_option_dentry *topt,
6474 			 struct tracer_flags *flags,
6475 			 struct tracer_opt *opt)
6476 {
6477 	struct dentry *t_options;
6478 
6479 	t_options = trace_options_init_dentry(tr);
6480 	if (!t_options)
6481 		return;
6482 
6483 	topt->flags = flags;
6484 	topt->opt = opt;
6485 	topt->tr = tr;
6486 
6487 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6488 				    &trace_options_fops);
6489 
6490 }
6491 
6492 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)6493 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6494 {
6495 	struct trace_option_dentry *topts;
6496 	struct trace_options *tr_topts;
6497 	struct tracer_flags *flags;
6498 	struct tracer_opt *opts;
6499 	int cnt;
6500 	int i;
6501 
6502 	if (!tracer)
6503 		return;
6504 
6505 	flags = tracer->flags;
6506 
6507 	if (!flags || !flags->opts)
6508 		return;
6509 
6510 	/*
6511 	 * If this is an instance, only create flags for tracers
6512 	 * the instance may have.
6513 	 */
6514 	if (!trace_ok_for_array(tracer, tr))
6515 		return;
6516 
6517 	for (i = 0; i < tr->nr_topts; i++) {
6518 		/*
6519 		 * Check if these flags have already been added.
6520 		 * Some tracers share flags.
6521 		 */
6522 		if (tr->topts[i].tracer->flags == tracer->flags)
6523 			return;
6524 	}
6525 
6526 	opts = flags->opts;
6527 
6528 	for (cnt = 0; opts[cnt].name; cnt++)
6529 		;
6530 
6531 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6532 	if (!topts)
6533 		return;
6534 
6535 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6536 			    GFP_KERNEL);
6537 	if (!tr_topts) {
6538 		kfree(topts);
6539 		return;
6540 	}
6541 
6542 	tr->topts = tr_topts;
6543 	tr->topts[tr->nr_topts].tracer = tracer;
6544 	tr->topts[tr->nr_topts].topts = topts;
6545 	tr->nr_topts++;
6546 
6547 	for (cnt = 0; opts[cnt].name; cnt++) {
6548 		create_trace_option_file(tr, &topts[cnt], flags,
6549 					 &opts[cnt]);
6550 		WARN_ONCE(topts[cnt].entry == NULL,
6551 			  "Failed to create trace option: %s",
6552 			  opts[cnt].name);
6553 	}
6554 }
6555 
6556 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)6557 create_trace_option_core_file(struct trace_array *tr,
6558 			      const char *option, long index)
6559 {
6560 	struct dentry *t_options;
6561 
6562 	t_options = trace_options_init_dentry(tr);
6563 	if (!t_options)
6564 		return NULL;
6565 
6566 	return trace_create_file(option, 0644, t_options,
6567 				 (void *)&tr->trace_flags_index[index],
6568 				 &trace_options_core_fops);
6569 }
6570 
create_trace_options_dir(struct trace_array * tr)6571 static void create_trace_options_dir(struct trace_array *tr)
6572 {
6573 	struct dentry *t_options;
6574 	bool top_level = tr == &global_trace;
6575 	int i;
6576 
6577 	t_options = trace_options_init_dentry(tr);
6578 	if (!t_options)
6579 		return;
6580 
6581 	for (i = 0; trace_options[i]; i++) {
6582 		if (top_level ||
6583 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6584 			create_trace_option_core_file(tr, trace_options[i], i);
6585 	}
6586 }
6587 
6588 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6589 rb_simple_read(struct file *filp, char __user *ubuf,
6590 	       size_t cnt, loff_t *ppos)
6591 {
6592 	struct trace_array *tr = filp->private_data;
6593 	char buf[64];
6594 	int r;
6595 
6596 	r = tracer_tracing_is_on(tr);
6597 	r = sprintf(buf, "%d\n", r);
6598 
6599 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6600 }
6601 
6602 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6603 rb_simple_write(struct file *filp, const char __user *ubuf,
6604 		size_t cnt, loff_t *ppos)
6605 {
6606 	struct trace_array *tr = filp->private_data;
6607 	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6608 	unsigned long val;
6609 	int ret;
6610 
6611 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6612 	if (ret)
6613 		return ret;
6614 
6615 	if (buffer) {
6616 		mutex_lock(&trace_types_lock);
6617 		if (!!val == tracer_tracing_is_on(tr)) {
6618 			val = 0; /* do nothing */
6619 		} else if (val) {
6620 			tracer_tracing_on(tr);
6621 			if (tr->current_trace->start)
6622 				tr->current_trace->start(tr);
6623 		} else {
6624 			tracer_tracing_off(tr);
6625 			if (tr->current_trace->stop)
6626 				tr->current_trace->stop(tr);
6627 		}
6628 		mutex_unlock(&trace_types_lock);
6629 	}
6630 
6631 	(*ppos)++;
6632 
6633 	return cnt;
6634 }
6635 
6636 static const struct file_operations rb_simple_fops = {
6637 	.open		= tracing_open_generic_tr,
6638 	.read		= rb_simple_read,
6639 	.write		= rb_simple_write,
6640 	.release	= tracing_release_generic_tr,
6641 	.llseek		= default_llseek,
6642 };
6643 
6644 struct dentry *trace_instance_dir;
6645 
6646 static void
6647 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6648 
6649 static int
allocate_trace_buffer(struct trace_array * tr,struct trace_buffer * buf,int size)6650 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6651 {
6652 	enum ring_buffer_flags rb_flags;
6653 
6654 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6655 
6656 	buf->tr = tr;
6657 
6658 	buf->buffer = ring_buffer_alloc(size, rb_flags);
6659 	if (!buf->buffer)
6660 		return -ENOMEM;
6661 
6662 	buf->data = alloc_percpu(struct trace_array_cpu);
6663 	if (!buf->data) {
6664 		ring_buffer_free(buf->buffer);
6665 		buf->buffer = NULL;
6666 		return -ENOMEM;
6667 	}
6668 
6669 	/* Allocate the first page for all buffers */
6670 	set_buffer_entries(&tr->trace_buffer,
6671 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6672 
6673 	return 0;
6674 }
6675 
allocate_trace_buffers(struct trace_array * tr,int size)6676 static int allocate_trace_buffers(struct trace_array *tr, int size)
6677 {
6678 	int ret;
6679 
6680 	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6681 	if (ret)
6682 		return ret;
6683 
6684 #ifdef CONFIG_TRACER_MAX_TRACE
6685 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6686 				    allocate_snapshot ? size : 1);
6687 	if (WARN_ON(ret)) {
6688 		ring_buffer_free(tr->trace_buffer.buffer);
6689 		tr->trace_buffer.buffer = NULL;
6690 		free_percpu(tr->trace_buffer.data);
6691 		tr->trace_buffer.data = NULL;
6692 		return -ENOMEM;
6693 	}
6694 	tr->allocated_snapshot = allocate_snapshot;
6695 
6696 	/*
6697 	 * Only the top level trace array gets its snapshot allocated
6698 	 * from the kernel command line.
6699 	 */
6700 	allocate_snapshot = false;
6701 #endif
6702 
6703 	/*
6704 	 * Because of some magic with the way alloc_percpu() works on
6705 	 * x86_64, we need to synchronize the pgd of all the tables,
6706 	 * otherwise the trace events that happen in x86_64 page fault
6707 	 * handlers can't cope with accessing the chance that a
6708 	 * alloc_percpu()'d memory might be touched in the page fault trace
6709 	 * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
6710 	 * calls in tracing, because something might get triggered within a
6711 	 * page fault trace event!
6712 	 */
6713 	vmalloc_sync_mappings();
6714 
6715 	return 0;
6716 }
6717 
free_trace_buffer(struct trace_buffer * buf)6718 static void free_trace_buffer(struct trace_buffer *buf)
6719 {
6720 	if (buf->buffer) {
6721 		ring_buffer_free(buf->buffer);
6722 		buf->buffer = NULL;
6723 		free_percpu(buf->data);
6724 		buf->data = NULL;
6725 	}
6726 }
6727 
free_trace_buffers(struct trace_array * tr)6728 static void free_trace_buffers(struct trace_array *tr)
6729 {
6730 	if (!tr)
6731 		return;
6732 
6733 	free_trace_buffer(&tr->trace_buffer);
6734 
6735 #ifdef CONFIG_TRACER_MAX_TRACE
6736 	free_trace_buffer(&tr->max_buffer);
6737 #endif
6738 }
6739 
init_trace_flags_index(struct trace_array * tr)6740 static void init_trace_flags_index(struct trace_array *tr)
6741 {
6742 	int i;
6743 
6744 	/* Used by the trace options files */
6745 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6746 		tr->trace_flags_index[i] = i;
6747 }
6748 
__update_tracer_options(struct trace_array * tr)6749 static void __update_tracer_options(struct trace_array *tr)
6750 {
6751 	struct tracer *t;
6752 
6753 	for (t = trace_types; t; t = t->next)
6754 		add_tracer_options(tr, t);
6755 }
6756 
update_tracer_options(struct trace_array * tr)6757 static void update_tracer_options(struct trace_array *tr)
6758 {
6759 	mutex_lock(&trace_types_lock);
6760 	__update_tracer_options(tr);
6761 	mutex_unlock(&trace_types_lock);
6762 }
6763 
instance_mkdir(const char * name)6764 static int instance_mkdir(const char *name)
6765 {
6766 	struct trace_array *tr;
6767 	int ret;
6768 
6769 	mutex_lock(&trace_types_lock);
6770 
6771 	ret = -EEXIST;
6772 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6773 		if (tr->name && strcmp(tr->name, name) == 0)
6774 			goto out_unlock;
6775 	}
6776 
6777 	ret = -ENOMEM;
6778 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6779 	if (!tr)
6780 		goto out_unlock;
6781 
6782 	tr->name = kstrdup(name, GFP_KERNEL);
6783 	if (!tr->name)
6784 		goto out_free_tr;
6785 
6786 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6787 		goto out_free_tr;
6788 
6789 	tr->trace_flags = global_trace.trace_flags;
6790 
6791 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6792 
6793 	raw_spin_lock_init(&tr->start_lock);
6794 
6795 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6796 
6797 	tr->current_trace = &nop_trace;
6798 
6799 	INIT_LIST_HEAD(&tr->systems);
6800 	INIT_LIST_HEAD(&tr->events);
6801 
6802 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6803 		goto out_free_tr;
6804 
6805 	tr->dir = tracefs_create_dir(name, trace_instance_dir);
6806 	if (!tr->dir)
6807 		goto out_free_tr;
6808 
6809 	ret = event_trace_add_tracer(tr->dir, tr);
6810 	if (ret) {
6811 		tracefs_remove_recursive(tr->dir);
6812 		goto out_free_tr;
6813 	}
6814 
6815 	init_tracer_tracefs(tr, tr->dir);
6816 	init_trace_flags_index(tr);
6817 	__update_tracer_options(tr);
6818 
6819 	list_add(&tr->list, &ftrace_trace_arrays);
6820 
6821 	mutex_unlock(&trace_types_lock);
6822 
6823 	return 0;
6824 
6825  out_free_tr:
6826 	free_trace_buffers(tr);
6827 	free_cpumask_var(tr->tracing_cpumask);
6828 	kfree(tr->name);
6829 	kfree(tr);
6830 
6831  out_unlock:
6832 	mutex_unlock(&trace_types_lock);
6833 
6834 	return ret;
6835 
6836 }
6837 
instance_rmdir(const char * name)6838 static int instance_rmdir(const char *name)
6839 {
6840 	struct trace_array *tr;
6841 	int found = 0;
6842 	int ret;
6843 	int i;
6844 
6845 	mutex_lock(&trace_types_lock);
6846 
6847 	ret = -ENODEV;
6848 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6849 		if (tr->name && strcmp(tr->name, name) == 0) {
6850 			found = 1;
6851 			break;
6852 		}
6853 	}
6854 	if (!found)
6855 		goto out_unlock;
6856 
6857 	ret = -EBUSY;
6858 	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6859 		goto out_unlock;
6860 
6861 	list_del(&tr->list);
6862 
6863 	tracing_set_nop(tr);
6864 	event_trace_del_tracer(tr);
6865 	ftrace_destroy_function_files(tr);
6866 	tracefs_remove_recursive(tr->dir);
6867 	free_trace_buffers(tr);
6868 
6869 	for (i = 0; i < tr->nr_topts; i++) {
6870 		kfree(tr->topts[i].topts);
6871 	}
6872 	kfree(tr->topts);
6873 
6874 	free_cpumask_var(tr->tracing_cpumask);
6875 	kfree(tr->name);
6876 	kfree(tr);
6877 
6878 	ret = 0;
6879 
6880  out_unlock:
6881 	mutex_unlock(&trace_types_lock);
6882 
6883 	return ret;
6884 }
6885 
create_trace_instances(struct dentry * d_tracer)6886 static __init void create_trace_instances(struct dentry *d_tracer)
6887 {
6888 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6889 							 instance_mkdir,
6890 							 instance_rmdir);
6891 	if (WARN_ON(!trace_instance_dir))
6892 		return;
6893 }
6894 
6895 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)6896 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6897 {
6898 	int cpu;
6899 
6900 	trace_create_file("available_tracers", 0444, d_tracer,
6901 			tr, &show_traces_fops);
6902 
6903 	trace_create_file("current_tracer", 0644, d_tracer,
6904 			tr, &set_tracer_fops);
6905 
6906 	trace_create_file("tracing_cpumask", 0644, d_tracer,
6907 			  tr, &tracing_cpumask_fops);
6908 
6909 	trace_create_file("trace_options", 0644, d_tracer,
6910 			  tr, &tracing_iter_fops);
6911 
6912 	trace_create_file("trace", 0644, d_tracer,
6913 			  tr, &tracing_fops);
6914 
6915 	trace_create_file("trace_pipe", 0444, d_tracer,
6916 			  tr, &tracing_pipe_fops);
6917 
6918 	trace_create_file("buffer_size_kb", 0644, d_tracer,
6919 			  tr, &tracing_entries_fops);
6920 
6921 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6922 			  tr, &tracing_total_entries_fops);
6923 
6924 	trace_create_file("free_buffer", 0200, d_tracer,
6925 			  tr, &tracing_free_buffer_fops);
6926 
6927 	trace_create_file("trace_marker", 0220, d_tracer,
6928 			  tr, &tracing_mark_fops);
6929 
6930 	trace_create_file("saved_tgids", 0444, d_tracer,
6931 			  tr, &tracing_saved_tgids_fops);
6932 
6933 	trace_create_file("trace_clock", 0644, d_tracer, tr,
6934 			  &trace_clock_fops);
6935 
6936 	trace_create_file("tracing_on", 0644, d_tracer,
6937 			  tr, &rb_simple_fops);
6938 
6939 	create_trace_options_dir(tr);
6940 
6941 #ifdef CONFIG_TRACER_MAX_TRACE
6942 	trace_create_file("tracing_max_latency", 0644, d_tracer,
6943 			&tr->max_latency, &tracing_max_lat_fops);
6944 #endif
6945 
6946 	if (ftrace_create_function_files(tr, d_tracer))
6947 		WARN(1, "Could not allocate function filter files");
6948 
6949 #ifdef CONFIG_TRACER_SNAPSHOT
6950 	trace_create_file("snapshot", 0644, d_tracer,
6951 			  tr, &snapshot_fops);
6952 #endif
6953 
6954 	for_each_tracing_cpu(cpu)
6955 		tracing_init_tracefs_percpu(tr, cpu);
6956 
6957 }
6958 
trace_automount(void * ingore)6959 static struct vfsmount *trace_automount(void *ingore)
6960 {
6961 	struct vfsmount *mnt;
6962 	struct file_system_type *type;
6963 
6964 	/*
6965 	 * To maintain backward compatibility for tools that mount
6966 	 * debugfs to get to the tracing facility, tracefs is automatically
6967 	 * mounted to the debugfs/tracing directory.
6968 	 */
6969 	type = get_fs_type("tracefs");
6970 	if (!type)
6971 		return NULL;
6972 	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6973 	put_filesystem(type);
6974 	if (IS_ERR(mnt))
6975 		return NULL;
6976 	mntget(mnt);
6977 
6978 	return mnt;
6979 }
6980 
6981 /**
6982  * tracing_init_dentry - initialize top level trace array
6983  *
6984  * This is called when creating files or directories in the tracing
6985  * directory. It is called via fs_initcall() by any of the boot up code
6986  * and expects to return the dentry of the top level tracing directory.
6987  */
tracing_init_dentry(void)6988 struct dentry *tracing_init_dentry(void)
6989 {
6990 	struct trace_array *tr = &global_trace;
6991 
6992 	/* The top level trace array uses  NULL as parent */
6993 	if (tr->dir)
6994 		return NULL;
6995 
6996 	if (WARN_ON(!tracefs_initialized()) ||
6997 		(IS_ENABLED(CONFIG_DEBUG_FS) &&
6998 		 WARN_ON(!debugfs_initialized())))
6999 		return ERR_PTR(-ENODEV);
7000 
7001 	/*
7002 	 * As there may still be users that expect the tracing
7003 	 * files to exist in debugfs/tracing, we must automount
7004 	 * the tracefs file system there, so older tools still
7005 	 * work with the newer kerenl.
7006 	 */
7007 	tr->dir = debugfs_create_automount("tracing", NULL,
7008 					   trace_automount, NULL);
7009 	if (!tr->dir) {
7010 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
7011 		return ERR_PTR(-ENOMEM);
7012 	}
7013 
7014 	return NULL;
7015 }
7016 
7017 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7018 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7019 
trace_enum_init(void)7020 static void __init trace_enum_init(void)
7021 {
7022 	int len;
7023 
7024 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7025 	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7026 }
7027 
7028 #ifdef CONFIG_MODULES
trace_module_add_enums(struct module * mod)7029 static void trace_module_add_enums(struct module *mod)
7030 {
7031 	if (!mod->num_trace_enums)
7032 		return;
7033 
7034 	/*
7035 	 * Modules with bad taint do not have events created, do
7036 	 * not bother with enums either.
7037 	 */
7038 	if (trace_module_has_bad_taint(mod))
7039 		return;
7040 
7041 	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7042 }
7043 
7044 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
trace_module_remove_enums(struct module * mod)7045 static void trace_module_remove_enums(struct module *mod)
7046 {
7047 	union trace_enum_map_item *map;
7048 	union trace_enum_map_item **last = &trace_enum_maps;
7049 
7050 	if (!mod->num_trace_enums)
7051 		return;
7052 
7053 	mutex_lock(&trace_enum_mutex);
7054 
7055 	map = trace_enum_maps;
7056 
7057 	while (map) {
7058 		if (map->head.mod == mod)
7059 			break;
7060 		map = trace_enum_jmp_to_tail(map);
7061 		last = &map->tail.next;
7062 		map = map->tail.next;
7063 	}
7064 	if (!map)
7065 		goto out;
7066 
7067 	*last = trace_enum_jmp_to_tail(map)->tail.next;
7068 	kfree(map);
7069  out:
7070 	mutex_unlock(&trace_enum_mutex);
7071 }
7072 #else
trace_module_remove_enums(struct module * mod)7073 static inline void trace_module_remove_enums(struct module *mod) { }
7074 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7075 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)7076 static int trace_module_notify(struct notifier_block *self,
7077 			       unsigned long val, void *data)
7078 {
7079 	struct module *mod = data;
7080 
7081 	switch (val) {
7082 	case MODULE_STATE_COMING:
7083 		trace_module_add_enums(mod);
7084 		break;
7085 	case MODULE_STATE_GOING:
7086 		trace_module_remove_enums(mod);
7087 		break;
7088 	}
7089 
7090 	return 0;
7091 }
7092 
7093 static struct notifier_block trace_module_nb = {
7094 	.notifier_call = trace_module_notify,
7095 	.priority = 0,
7096 };
7097 #endif /* CONFIG_MODULES */
7098 
tracer_init_tracefs(void)7099 static __init int tracer_init_tracefs(void)
7100 {
7101 	struct dentry *d_tracer;
7102 
7103 	trace_access_lock_init();
7104 
7105 	d_tracer = tracing_init_dentry();
7106 	if (IS_ERR(d_tracer))
7107 		return 0;
7108 
7109 	init_tracer_tracefs(&global_trace, d_tracer);
7110 
7111 	trace_create_file("tracing_thresh", 0644, d_tracer,
7112 			&global_trace, &tracing_thresh_fops);
7113 
7114 	trace_create_file("README", 0444, d_tracer,
7115 			NULL, &tracing_readme_fops);
7116 
7117 	trace_create_file("saved_cmdlines", 0444, d_tracer,
7118 			NULL, &tracing_saved_cmdlines_fops);
7119 
7120 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7121 			  NULL, &tracing_saved_cmdlines_size_fops);
7122 
7123 	trace_enum_init();
7124 
7125 	trace_create_enum_file(d_tracer);
7126 
7127 #ifdef CONFIG_MODULES
7128 	register_module_notifier(&trace_module_nb);
7129 #endif
7130 
7131 #ifdef CONFIG_DYNAMIC_FTRACE
7132 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7133 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7134 #endif
7135 
7136 	create_trace_instances(d_tracer);
7137 
7138 	update_tracer_options(&global_trace);
7139 
7140 	return 0;
7141 }
7142 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)7143 static int trace_panic_handler(struct notifier_block *this,
7144 			       unsigned long event, void *unused)
7145 {
7146 	if (ftrace_dump_on_oops)
7147 		ftrace_dump(ftrace_dump_on_oops);
7148 	return NOTIFY_OK;
7149 }
7150 
7151 static struct notifier_block trace_panic_notifier = {
7152 	.notifier_call  = trace_panic_handler,
7153 	.next           = NULL,
7154 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7155 };
7156 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)7157 static int trace_die_handler(struct notifier_block *self,
7158 			     unsigned long val,
7159 			     void *data)
7160 {
7161 	switch (val) {
7162 	case DIE_OOPS:
7163 		if (ftrace_dump_on_oops)
7164 			ftrace_dump(ftrace_dump_on_oops);
7165 		break;
7166 	default:
7167 		break;
7168 	}
7169 	return NOTIFY_OK;
7170 }
7171 
7172 static struct notifier_block trace_die_notifier = {
7173 	.notifier_call = trace_die_handler,
7174 	.priority = 200
7175 };
7176 
7177 /*
7178  * printk is set to max of 1024, we really don't need it that big.
7179  * Nothing should be printing 1000 characters anyway.
7180  */
7181 #define TRACE_MAX_PRINT		1000
7182 
7183 /*
7184  * Define here KERN_TRACE so that we have one place to modify
7185  * it if we decide to change what log level the ftrace dump
7186  * should be at.
7187  */
7188 #define KERN_TRACE		KERN_EMERG
7189 
7190 void
trace_printk_seq(struct trace_seq * s)7191 trace_printk_seq(struct trace_seq *s)
7192 {
7193 	/* Probably should print a warning here. */
7194 	if (s->seq.len >= TRACE_MAX_PRINT)
7195 		s->seq.len = TRACE_MAX_PRINT;
7196 
7197 	/*
7198 	 * More paranoid code. Although the buffer size is set to
7199 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7200 	 * an extra layer of protection.
7201 	 */
7202 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7203 		s->seq.len = s->seq.size - 1;
7204 
7205 	/* should be zero ended, but we are paranoid. */
7206 	s->buffer[s->seq.len] = 0;
7207 
7208 	printk(KERN_TRACE "%s", s->buffer);
7209 
7210 	trace_seq_init(s);
7211 }
7212 
trace_init_global_iter(struct trace_iterator * iter)7213 void trace_init_global_iter(struct trace_iterator *iter)
7214 {
7215 	iter->tr = &global_trace;
7216 	iter->trace = iter->tr->current_trace;
7217 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7218 	iter->trace_buffer = &global_trace.trace_buffer;
7219 
7220 	if (iter->trace && iter->trace->open)
7221 		iter->trace->open(iter);
7222 
7223 	/* Annotate start of buffers if we had overruns */
7224 	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7225 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7226 
7227 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7228 	if (trace_clocks[iter->tr->clock_id].in_ns)
7229 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7230 }
7231 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)7232 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7233 {
7234 	/* use static because iter can be a bit big for the stack */
7235 	static struct trace_iterator iter;
7236 	static atomic_t dump_running;
7237 	struct trace_array *tr = &global_trace;
7238 	unsigned int old_userobj;
7239 	unsigned long flags;
7240 	int cnt = 0, cpu;
7241 
7242 	/* Only allow one dump user at a time. */
7243 	if (atomic_inc_return(&dump_running) != 1) {
7244 		atomic_dec(&dump_running);
7245 		return;
7246 	}
7247 
7248 	/*
7249 	 * Always turn off tracing when we dump.
7250 	 * We don't need to show trace output of what happens
7251 	 * between multiple crashes.
7252 	 *
7253 	 * If the user does a sysrq-z, then they can re-enable
7254 	 * tracing with echo 1 > tracing_on.
7255 	 */
7256 	tracing_off();
7257 
7258 	local_irq_save(flags);
7259 
7260 	/* Simulate the iterator */
7261 	trace_init_global_iter(&iter);
7262 
7263 	for_each_tracing_cpu(cpu) {
7264 		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7265 	}
7266 
7267 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7268 
7269 	/* don't look at user memory in panic mode */
7270 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7271 
7272 	switch (oops_dump_mode) {
7273 	case DUMP_ALL:
7274 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7275 		break;
7276 	case DUMP_ORIG:
7277 		iter.cpu_file = raw_smp_processor_id();
7278 		break;
7279 	case DUMP_NONE:
7280 		goto out_enable;
7281 	default:
7282 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7283 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7284 	}
7285 
7286 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7287 
7288 	/* Did function tracer already get disabled? */
7289 	if (ftrace_is_dead()) {
7290 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7291 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7292 	}
7293 
7294 	/*
7295 	 * We need to stop all tracing on all CPUS to read the
7296 	 * the next buffer. This is a bit expensive, but is
7297 	 * not done often. We fill all what we can read,
7298 	 * and then release the locks again.
7299 	 */
7300 
7301 	while (!trace_empty(&iter)) {
7302 
7303 		if (!cnt)
7304 			printk(KERN_TRACE "---------------------------------\n");
7305 
7306 		cnt++;
7307 
7308 		trace_iterator_reset(&iter);
7309 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7310 
7311 		if (trace_find_next_entry_inc(&iter) != NULL) {
7312 			int ret;
7313 
7314 			ret = print_trace_line(&iter);
7315 			if (ret != TRACE_TYPE_NO_CONSUME)
7316 				trace_consume(&iter);
7317 		}
7318 		touch_nmi_watchdog();
7319 
7320 		trace_printk_seq(&iter.seq);
7321 	}
7322 
7323 	if (!cnt)
7324 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7325 	else
7326 		printk(KERN_TRACE "---------------------------------\n");
7327 
7328  out_enable:
7329 	tr->trace_flags |= old_userobj;
7330 
7331 	for_each_tracing_cpu(cpu) {
7332 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7333 	}
7334  	atomic_dec(&dump_running);
7335 	local_irq_restore(flags);
7336 }
7337 EXPORT_SYMBOL_GPL(ftrace_dump);
7338 
tracer_alloc_buffers(void)7339 __init static int tracer_alloc_buffers(void)
7340 {
7341 	int ring_buf_size;
7342 	int ret = -ENOMEM;
7343 
7344 	/*
7345 	 * Make sure we don't accidently add more trace options
7346 	 * than we have bits for.
7347 	 */
7348 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7349 
7350 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7351 		goto out;
7352 
7353 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7354 		goto out_free_buffer_mask;
7355 
7356 	/* Only allocate trace_printk buffers if a trace_printk exists */
7357 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
7358 		/* Must be called before global_trace.buffer is allocated */
7359 		trace_printk_init_buffers();
7360 
7361 	/* To save memory, keep the ring buffer size to its minimum */
7362 	if (ring_buffer_expanded)
7363 		ring_buf_size = trace_buf_size;
7364 	else
7365 		ring_buf_size = 1;
7366 
7367 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7368 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7369 
7370 	raw_spin_lock_init(&global_trace.start_lock);
7371 
7372 	/* Used for event triggers */
7373 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7374 	if (!temp_buffer)
7375 		goto out_free_cpumask;
7376 
7377 	if (trace_create_savedcmd() < 0)
7378 		goto out_free_temp_buffer;
7379 
7380 	/* TODO: make the number of buffers hot pluggable with CPUS */
7381 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7382 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7383 		WARN_ON(1);
7384 		goto out_free_savedcmd;
7385 	}
7386 
7387 	if (global_trace.buffer_disabled)
7388 		tracing_off();
7389 
7390 	if (trace_boot_clock) {
7391 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7392 		if (ret < 0)
7393 			pr_warning("Trace clock %s not defined, going back to default\n",
7394 				   trace_boot_clock);
7395 	}
7396 
7397 	/*
7398 	 * register_tracer() might reference current_trace, so it
7399 	 * needs to be set before we register anything. This is
7400 	 * just a bootstrap of current_trace anyway.
7401 	 */
7402 	global_trace.current_trace = &nop_trace;
7403 
7404 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7405 
7406 	ftrace_init_global_array_ops(&global_trace);
7407 
7408 	init_trace_flags_index(&global_trace);
7409 
7410 	register_tracer(&nop_trace);
7411 
7412 	/* All seems OK, enable tracing */
7413 	tracing_disabled = 0;
7414 
7415 	atomic_notifier_chain_register(&panic_notifier_list,
7416 				       &trace_panic_notifier);
7417 
7418 	register_die_notifier(&trace_die_notifier);
7419 
7420 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7421 
7422 	INIT_LIST_HEAD(&global_trace.systems);
7423 	INIT_LIST_HEAD(&global_trace.events);
7424 	list_add(&global_trace.list, &ftrace_trace_arrays);
7425 
7426 	apply_trace_boot_options();
7427 
7428 	register_snapshot_cmd();
7429 
7430 	return 0;
7431 
7432 out_free_savedcmd:
7433 	free_saved_cmdlines_buffer(savedcmd);
7434 out_free_temp_buffer:
7435 	ring_buffer_free(temp_buffer);
7436 out_free_cpumask:
7437 	free_cpumask_var(global_trace.tracing_cpumask);
7438 out_free_buffer_mask:
7439 	free_cpumask_var(tracing_buffer_mask);
7440 out:
7441 	return ret;
7442 }
7443 
trace_init(void)7444 void __init trace_init(void)
7445 {
7446 	if (tracepoint_printk) {
7447 		tracepoint_print_iter =
7448 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7449 		if (WARN_ON(!tracepoint_print_iter))
7450 			tracepoint_printk = 0;
7451 	}
7452 	tracer_alloc_buffers();
7453 	trace_event_init();
7454 }
7455 
clear_boot_tracer(void)7456 __init static int clear_boot_tracer(void)
7457 {
7458 	/*
7459 	 * The default tracer at boot buffer is an init section.
7460 	 * This function is called in lateinit. If we did not
7461 	 * find the boot tracer, then clear it out, to prevent
7462 	 * later registration from accessing the buffer that is
7463 	 * about to be freed.
7464 	 */
7465 	if (!default_bootup_tracer)
7466 		return 0;
7467 
7468 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7469 	       default_bootup_tracer);
7470 	default_bootup_tracer = NULL;
7471 
7472 	return 0;
7473 }
7474 
7475 fs_initcall(tracer_init_tracefs);
7476 late_initcall(clear_boot_tracer);
7477