• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/kmemleak.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <trace/hooks/ftrace_dump.h>
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 /*
59  * On boot up, the ring buffer is set to the minimum size, so that
60  * we do not waste memory on systems that are not using tracing.
61  */
62 bool ring_buffer_expanded;
63 
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72 
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78 
79 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 	if (!tracing_selftest_disabled) {
83 		tracing_selftest_disabled = true;
84 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 	}
86 }
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94 
95 /* For tracers that don't implement custom flags */
96 static struct tracer_opt dummy_tracer_opt[] = {
97 	{ }
98 };
99 
100 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)101 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
102 {
103 	return 0;
104 }
105 
106 /*
107  * To prevent the comm cache from being overwritten when no
108  * tracing is active, only save the comm when a trace event
109  * occurred.
110  */
111 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
112 
113 /*
114  * Kill all tracing for good (never come back).
115  * It is initialized to 1 but will turn to zero if the initialization
116  * of the tracer is successful. But that is the only place that sets
117  * this back to zero.
118  */
119 static int tracing_disabled = 1;
120 
121 cpumask_var_t __read_mostly	tracing_buffer_mask;
122 
123 /*
124  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
125  *
126  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
127  * is set, then ftrace_dump is called. This will output the contents
128  * of the ftrace buffers to the console.  This is very useful for
129  * capturing traces that lead to crashes and outputing it to a
130  * serial console.
131  *
132  * It is default off, but you can enable it with either specifying
133  * "ftrace_dump_on_oops" in the kernel command line, or setting
134  * /proc/sys/kernel/ftrace_dump_on_oops
135  * Set 1 if you want to dump buffers of all CPUs
136  * Set 2 if you want to dump the buffer of the CPU that triggered oops
137  */
138 
139 enum ftrace_dump_mode ftrace_dump_on_oops;
140 
141 /* When set, tracing will stop when a WARN*() is hit */
142 int __disable_trace_on_warning;
143 
144 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
145 /* Map of enums to their values, for "eval_map" file */
146 struct trace_eval_map_head {
147 	struct module			*mod;
148 	unsigned long			length;
149 };
150 
151 union trace_eval_map_item;
152 
153 struct trace_eval_map_tail {
154 	/*
155 	 * "end" is first and points to NULL as it must be different
156 	 * than "mod" or "eval_string"
157 	 */
158 	union trace_eval_map_item	*next;
159 	const char			*end;	/* points to NULL */
160 };
161 
162 static DEFINE_MUTEX(trace_eval_mutex);
163 
164 /*
165  * The trace_eval_maps are saved in an array with two extra elements,
166  * one at the beginning, and one at the end. The beginning item contains
167  * the count of the saved maps (head.length), and the module they
168  * belong to if not built in (head.mod). The ending item contains a
169  * pointer to the next array of saved eval_map items.
170  */
171 union trace_eval_map_item {
172 	struct trace_eval_map		map;
173 	struct trace_eval_map_head	head;
174 	struct trace_eval_map_tail	tail;
175 };
176 
177 static union trace_eval_map_item *trace_eval_maps;
178 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
179 
180 int tracing_set_tracer(struct trace_array *tr, const char *buf);
181 static void ftrace_trace_userstack(struct trace_array *tr,
182 				   struct trace_buffer *buffer,
183 				   unsigned int trace_ctx);
184 
185 #define MAX_TRACER_SIZE		100
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188 
189 static bool allocate_snapshot;
190 
set_cmdline_ftrace(char * str)191 static int __init set_cmdline_ftrace(char *str)
192 {
193 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
194 	default_bootup_tracer = bootup_tracer_buf;
195 	/* We are using ftrace early, expand it */
196 	ring_buffer_expanded = true;
197 	return 1;
198 }
199 __setup("ftrace=", set_cmdline_ftrace);
200 
set_ftrace_dump_on_oops(char * str)201 static int __init set_ftrace_dump_on_oops(char *str)
202 {
203 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
204 		ftrace_dump_on_oops = DUMP_ALL;
205 		return 1;
206 	}
207 
208 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
209 		ftrace_dump_on_oops = DUMP_ORIG;
210                 return 1;
211         }
212 
213         return 0;
214 }
215 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
216 
stop_trace_on_warning(char * str)217 static int __init stop_trace_on_warning(char *str)
218 {
219 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
220 		__disable_trace_on_warning = 1;
221 	return 1;
222 }
223 __setup("traceoff_on_warning", stop_trace_on_warning);
224 
boot_alloc_snapshot(char * str)225 static int __init boot_alloc_snapshot(char *str)
226 {
227 	allocate_snapshot = true;
228 	/* We also need the main ring buffer expanded */
229 	ring_buffer_expanded = true;
230 	return 1;
231 }
232 __setup("alloc_snapshot", boot_alloc_snapshot);
233 
234 
235 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
236 
set_trace_boot_options(char * str)237 static int __init set_trace_boot_options(char *str)
238 {
239 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
240 	return 1;
241 }
242 __setup("trace_options=", set_trace_boot_options);
243 
244 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
245 static char *trace_boot_clock __initdata;
246 
set_trace_boot_clock(char * str)247 static int __init set_trace_boot_clock(char *str)
248 {
249 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
250 	trace_boot_clock = trace_boot_clock_buf;
251 	return 1;
252 }
253 __setup("trace_clock=", set_trace_boot_clock);
254 
set_tracepoint_printk(char * str)255 static int __init set_tracepoint_printk(char *str)
256 {
257 	/* Ignore the "tp_printk_stop_on_boot" param */
258 	if (*str == '_')
259 		return 0;
260 
261 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
262 		tracepoint_printk = 1;
263 	return 1;
264 }
265 __setup("tp_printk", set_tracepoint_printk);
266 
set_tracepoint_printk_stop(char * str)267 static int __init set_tracepoint_printk_stop(char *str)
268 {
269 	tracepoint_printk_stop_on_boot = true;
270 	return 1;
271 }
272 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
273 
ns2usecs(u64 nsec)274 unsigned long long ns2usecs(u64 nsec)
275 {
276 	nsec += 500;
277 	do_div(nsec, 1000);
278 	return nsec;
279 }
280 
281 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)282 trace_process_export(struct trace_export *export,
283 	       struct ring_buffer_event *event, int flag)
284 {
285 	struct trace_entry *entry;
286 	unsigned int size = 0;
287 
288 	if (export->flags & flag) {
289 		entry = ring_buffer_event_data(event);
290 		size = ring_buffer_event_length(event);
291 		export->write(export, entry, size);
292 	}
293 }
294 
295 static DEFINE_MUTEX(ftrace_export_lock);
296 
297 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
298 
299 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
300 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
301 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
302 
ftrace_exports_enable(struct trace_export * export)303 static inline void ftrace_exports_enable(struct trace_export *export)
304 {
305 	if (export->flags & TRACE_EXPORT_FUNCTION)
306 		static_branch_inc(&trace_function_exports_enabled);
307 
308 	if (export->flags & TRACE_EXPORT_EVENT)
309 		static_branch_inc(&trace_event_exports_enabled);
310 
311 	if (export->flags & TRACE_EXPORT_MARKER)
312 		static_branch_inc(&trace_marker_exports_enabled);
313 }
314 
ftrace_exports_disable(struct trace_export * export)315 static inline void ftrace_exports_disable(struct trace_export *export)
316 {
317 	if (export->flags & TRACE_EXPORT_FUNCTION)
318 		static_branch_dec(&trace_function_exports_enabled);
319 
320 	if (export->flags & TRACE_EXPORT_EVENT)
321 		static_branch_dec(&trace_event_exports_enabled);
322 
323 	if (export->flags & TRACE_EXPORT_MARKER)
324 		static_branch_dec(&trace_marker_exports_enabled);
325 }
326 
ftrace_exports(struct ring_buffer_event * event,int flag)327 static void ftrace_exports(struct ring_buffer_event *event, int flag)
328 {
329 	struct trace_export *export;
330 
331 	preempt_disable_notrace();
332 
333 	export = rcu_dereference_raw_check(ftrace_exports_list);
334 	while (export) {
335 		trace_process_export(export, event, flag);
336 		export = rcu_dereference_raw_check(export->next);
337 	}
338 
339 	preempt_enable_notrace();
340 }
341 
342 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)343 add_trace_export(struct trace_export **list, struct trace_export *export)
344 {
345 	rcu_assign_pointer(export->next, *list);
346 	/*
347 	 * We are entering export into the list but another
348 	 * CPU might be walking that list. We need to make sure
349 	 * the export->next pointer is valid before another CPU sees
350 	 * the export pointer included into the list.
351 	 */
352 	rcu_assign_pointer(*list, export);
353 }
354 
355 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)356 rm_trace_export(struct trace_export **list, struct trace_export *export)
357 {
358 	struct trace_export **p;
359 
360 	for (p = list; *p != NULL; p = &(*p)->next)
361 		if (*p == export)
362 			break;
363 
364 	if (*p != export)
365 		return -1;
366 
367 	rcu_assign_pointer(*p, (*p)->next);
368 
369 	return 0;
370 }
371 
372 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)373 add_ftrace_export(struct trace_export **list, struct trace_export *export)
374 {
375 	ftrace_exports_enable(export);
376 
377 	add_trace_export(list, export);
378 }
379 
380 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)381 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383 	int ret;
384 
385 	ret = rm_trace_export(list, export);
386 	ftrace_exports_disable(export);
387 
388 	return ret;
389 }
390 
register_ftrace_export(struct trace_export * export)391 int register_ftrace_export(struct trace_export *export)
392 {
393 	if (WARN_ON_ONCE(!export->write))
394 		return -1;
395 
396 	mutex_lock(&ftrace_export_lock);
397 
398 	add_ftrace_export(&ftrace_exports_list, export);
399 
400 	mutex_unlock(&ftrace_export_lock);
401 
402 	return 0;
403 }
404 EXPORT_SYMBOL_GPL(register_ftrace_export);
405 
unregister_ftrace_export(struct trace_export * export)406 int unregister_ftrace_export(struct trace_export *export)
407 {
408 	int ret;
409 
410 	mutex_lock(&ftrace_export_lock);
411 
412 	ret = rm_ftrace_export(&ftrace_exports_list, export);
413 
414 	mutex_unlock(&ftrace_export_lock);
415 
416 	return ret;
417 }
418 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
419 
420 /* trace_flags holds trace_options default values */
421 #define TRACE_DEFAULT_FLAGS						\
422 	(FUNCTION_DEFAULT_FLAGS |					\
423 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
424 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
425 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
426 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
427 	 TRACE_ITER_HASH_PTR)
428 
429 /* trace_options that are only supported by global_trace */
430 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
431 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
432 
433 /* trace_flags that are default zero for instances */
434 #define ZEROED_TRACE_FLAGS \
435 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
436 
437 /*
438  * The global_trace is the descriptor that holds the top-level tracing
439  * buffers for the live tracing.
440  */
441 static struct trace_array global_trace = {
442 	.trace_flags = TRACE_DEFAULT_FLAGS,
443 };
444 
445 LIST_HEAD(ftrace_trace_arrays);
446 
trace_array_get(struct trace_array * this_tr)447 int trace_array_get(struct trace_array *this_tr)
448 {
449 	struct trace_array *tr;
450 	int ret = -ENODEV;
451 
452 	mutex_lock(&trace_types_lock);
453 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
454 		if (tr == this_tr) {
455 			tr->ref++;
456 			ret = 0;
457 			break;
458 		}
459 	}
460 	mutex_unlock(&trace_types_lock);
461 
462 	return ret;
463 }
464 
__trace_array_put(struct trace_array * this_tr)465 static void __trace_array_put(struct trace_array *this_tr)
466 {
467 	WARN_ON(!this_tr->ref);
468 	this_tr->ref--;
469 }
470 
471 /**
472  * trace_array_put - Decrement the reference counter for this trace array.
473  * @this_tr : pointer to the trace array
474  *
475  * NOTE: Use this when we no longer need the trace array returned by
476  * trace_array_get_by_name(). This ensures the trace array can be later
477  * destroyed.
478  *
479  */
trace_array_put(struct trace_array * this_tr)480 void trace_array_put(struct trace_array *this_tr)
481 {
482 	if (!this_tr)
483 		return;
484 
485 	mutex_lock(&trace_types_lock);
486 	__trace_array_put(this_tr);
487 	mutex_unlock(&trace_types_lock);
488 }
489 EXPORT_SYMBOL_GPL(trace_array_put);
490 
tracing_check_open_get_tr(struct trace_array * tr)491 int tracing_check_open_get_tr(struct trace_array *tr)
492 {
493 	int ret;
494 
495 	ret = security_locked_down(LOCKDOWN_TRACEFS);
496 	if (ret)
497 		return ret;
498 
499 	if (tracing_disabled)
500 		return -ENODEV;
501 
502 	if (tr && trace_array_get(tr) < 0)
503 		return -ENODEV;
504 
505 	return 0;
506 }
507 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)508 int call_filter_check_discard(struct trace_event_call *call, void *rec,
509 			      struct trace_buffer *buffer,
510 			      struct ring_buffer_event *event)
511 {
512 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
513 	    !filter_match_preds(call->filter, rec)) {
514 		__trace_event_discard_commit(buffer, event);
515 		return 1;
516 	}
517 
518 	return 0;
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	return trace_pid_list_is_set(filtered_pids, search_pid);
532 }
533 
534 /**
535  * trace_ignore_this_task - should a task be ignored for tracing
536  * @filtered_pids: The list of pids to check
537  * @filtered_no_pids: The list of pids not to be traced
538  * @task: The task that should be ignored if not filtered
539  *
540  * Checks if @task should be traced or not from @filtered_pids.
541  * Returns true if @task should *NOT* be traced.
542  * Returns false if @task should be traced.
543  */
544 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)545 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
546 		       struct trace_pid_list *filtered_no_pids,
547 		       struct task_struct *task)
548 {
549 	/*
550 	 * If filtered_no_pids is not empty, and the task's pid is listed
551 	 * in filtered_no_pids, then return true.
552 	 * Otherwise, if filtered_pids is empty, that means we can
553 	 * trace all tasks. If it has content, then only trace pids
554 	 * within filtered_pids.
555 	 */
556 
557 	return (filtered_pids &&
558 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
559 		(filtered_no_pids &&
560 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
561 }
562 
563 /**
564  * trace_filter_add_remove_task - Add or remove a task from a pid_list
565  * @pid_list: The list to modify
566  * @self: The current task for fork or NULL for exit
567  * @task: The task to add or remove
568  *
569  * If adding a task, if @self is defined, the task is only added if @self
570  * is also included in @pid_list. This happens on fork and tasks should
571  * only be added when the parent is listed. If @self is NULL, then the
572  * @task pid will be removed from the list, which would happen on exit
573  * of a task.
574  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)575 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
576 				  struct task_struct *self,
577 				  struct task_struct *task)
578 {
579 	if (!pid_list)
580 		return;
581 
582 	/* For forks, we only add if the forking task is listed */
583 	if (self) {
584 		if (!trace_find_filtered_pid(pid_list, self->pid))
585 			return;
586 	}
587 
588 	/* "self" is set for forks, and NULL for exits */
589 	if (self)
590 		trace_pid_list_set(pid_list, task->pid);
591 	else
592 		trace_pid_list_clear(pid_list, task->pid);
593 }
594 
595 /**
596  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
597  * @pid_list: The pid list to show
598  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
599  * @pos: The position of the file
600  *
601  * This is used by the seq_file "next" operation to iterate the pids
602  * listed in a trace_pid_list structure.
603  *
604  * Returns the pid+1 as we want to display pid of zero, but NULL would
605  * stop the iteration.
606  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)607 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
608 {
609 	long pid = (unsigned long)v;
610 	unsigned int next;
611 
612 	(*pos)++;
613 
614 	/* pid already is +1 of the actual previous bit */
615 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
616 		return NULL;
617 
618 	pid = next;
619 
620 	/* Return pid + 1 to allow zero to be represented */
621 	return (void *)(pid + 1);
622 }
623 
624 /**
625  * trace_pid_start - Used for seq_file to start reading pid lists
626  * @pid_list: The pid list to show
627  * @pos: The position of the file
628  *
629  * This is used by seq_file "start" operation to start the iteration
630  * of listing pids.
631  *
632  * Returns the pid+1 as we want to display pid of zero, but NULL would
633  * stop the iteration.
634  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)635 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
636 {
637 	unsigned long pid;
638 	unsigned int first;
639 	loff_t l = 0;
640 
641 	if (trace_pid_list_first(pid_list, &first) < 0)
642 		return NULL;
643 
644 	pid = first;
645 
646 	/* Return pid + 1 so that zero can be the exit value */
647 	for (pid++; pid && l < *pos;
648 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 		;
650 	return (void *)pid;
651 }
652 
653 /**
654  * trace_pid_show - show the current pid in seq_file processing
655  * @m: The seq_file structure to write into
656  * @v: A void pointer of the pid (+1) value to display
657  *
658  * Can be directly used by seq_file operations to display the current
659  * pid value.
660  */
trace_pid_show(struct seq_file * m,void * v)661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 	unsigned long pid = (unsigned long)v - 1;
664 
665 	seq_printf(m, "%lu\n", pid);
666 	return 0;
667 }
668 
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE		127
671 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 		    struct trace_pid_list **new_pid_list,
674 		    const char __user *ubuf, size_t cnt)
675 {
676 	struct trace_pid_list *pid_list;
677 	struct trace_parser parser;
678 	unsigned long val;
679 	int nr_pids = 0;
680 	ssize_t read = 0;
681 	ssize_t ret;
682 	loff_t pos;
683 	pid_t pid;
684 
685 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 		return -ENOMEM;
687 
688 	/*
689 	 * Always recreate a new array. The write is an all or nothing
690 	 * operation. Always create a new array when adding new pids by
691 	 * the user. If the operation fails, then the current list is
692 	 * not modified.
693 	 */
694 	pid_list = trace_pid_list_alloc();
695 	if (!pid_list) {
696 		trace_parser_put(&parser);
697 		return -ENOMEM;
698 	}
699 
700 	if (filtered_pids) {
701 		/* copy the current bits to the new max */
702 		ret = trace_pid_list_first(filtered_pids, &pid);
703 		while (!ret) {
704 			trace_pid_list_set(pid_list, pid);
705 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
706 			nr_pids++;
707 		}
708 	}
709 
710 	ret = 0;
711 	while (cnt > 0) {
712 
713 		pos = 0;
714 
715 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
716 		if (ret < 0)
717 			break;
718 
719 		read += ret;
720 		ubuf += ret;
721 		cnt -= ret;
722 
723 		if (!trace_parser_loaded(&parser))
724 			break;
725 
726 		ret = -EINVAL;
727 		if (kstrtoul(parser.buffer, 0, &val))
728 			break;
729 
730 		pid = (pid_t)val;
731 
732 		if (trace_pid_list_set(pid_list, pid) < 0) {
733 			ret = -1;
734 			break;
735 		}
736 		nr_pids++;
737 
738 		trace_parser_clear(&parser);
739 		ret = 0;
740 	}
741 	trace_parser_put(&parser);
742 
743 	if (ret < 0) {
744 		trace_pid_list_free(pid_list);
745 		return ret;
746 	}
747 
748 	if (!nr_pids) {
749 		/* Cleared the list of pids */
750 		trace_pid_list_free(pid_list);
751 		pid_list = NULL;
752 	}
753 
754 	*new_pid_list = pid_list;
755 
756 	return read;
757 }
758 
buffer_ftrace_now(struct array_buffer * buf,int cpu)759 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
760 {
761 	u64 ts;
762 
763 	/* Early boot up does not have a buffer yet */
764 	if (!buf->buffer)
765 		return trace_clock_local();
766 
767 	ts = ring_buffer_time_stamp(buf->buffer);
768 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
769 
770 	return ts;
771 }
772 
ftrace_now(int cpu)773 u64 ftrace_now(int cpu)
774 {
775 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
776 }
777 
778 /**
779  * tracing_is_enabled - Show if global_trace has been enabled
780  *
781  * Shows if the global trace has been enabled or not. It uses the
782  * mirror flag "buffer_disabled" to be used in fast paths such as for
783  * the irqsoff tracer. But it may be inaccurate due to races. If you
784  * need to know the accurate state, use tracing_is_on() which is a little
785  * slower, but accurate.
786  */
tracing_is_enabled(void)787 int tracing_is_enabled(void)
788 {
789 	/*
790 	 * For quick access (irqsoff uses this in fast path), just
791 	 * return the mirror variable of the state of the ring buffer.
792 	 * It's a little racy, but we don't really care.
793 	 */
794 	smp_rmb();
795 	return !global_trace.buffer_disabled;
796 }
797 
798 /*
799  * trace_buf_size is the size in bytes that is allocated
800  * for a buffer. Note, the number of bytes is always rounded
801  * to page size.
802  *
803  * This number is purposely set to a low number of 16384.
804  * If the dump on oops happens, it will be much appreciated
805  * to not have to wait for all that output. Anyway this can be
806  * boot time and run time configurable.
807  */
808 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
809 
810 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
811 
812 /* trace_types holds a link list of available tracers. */
813 static struct tracer		*trace_types __read_mostly;
814 
815 /*
816  * trace_types_lock is used to protect the trace_types list.
817  */
818 DEFINE_MUTEX(trace_types_lock);
819 
820 /*
821  * serialize the access of the ring buffer
822  *
823  * ring buffer serializes readers, but it is low level protection.
824  * The validity of the events (which returns by ring_buffer_peek() ..etc)
825  * are not protected by ring buffer.
826  *
827  * The content of events may become garbage if we allow other process consumes
828  * these events concurrently:
829  *   A) the page of the consumed events may become a normal page
830  *      (not reader page) in ring buffer, and this page will be rewritten
831  *      by events producer.
832  *   B) The page of the consumed events may become a page for splice_read,
833  *      and this page will be returned to system.
834  *
835  * These primitives allow multi process access to different cpu ring buffer
836  * concurrently.
837  *
838  * These primitives don't distinguish read-only and read-consume access.
839  * Multi read-only access are also serialized.
840  */
841 
842 #ifdef CONFIG_SMP
843 static DECLARE_RWSEM(all_cpu_access_lock);
844 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
845 
trace_access_lock(int cpu)846 static inline void trace_access_lock(int cpu)
847 {
848 	if (cpu == RING_BUFFER_ALL_CPUS) {
849 		/* gain it for accessing the whole ring buffer. */
850 		down_write(&all_cpu_access_lock);
851 	} else {
852 		/* gain it for accessing a cpu ring buffer. */
853 
854 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
855 		down_read(&all_cpu_access_lock);
856 
857 		/* Secondly block other access to this @cpu ring buffer. */
858 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
859 	}
860 }
861 
trace_access_unlock(int cpu)862 static inline void trace_access_unlock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		up_write(&all_cpu_access_lock);
866 	} else {
867 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
868 		up_read(&all_cpu_access_lock);
869 	}
870 }
871 
trace_access_lock_init(void)872 static inline void trace_access_lock_init(void)
873 {
874 	int cpu;
875 
876 	for_each_possible_cpu(cpu)
877 		mutex_init(&per_cpu(cpu_access_lock, cpu));
878 }
879 
880 #else
881 
882 static DEFINE_MUTEX(access_lock);
883 
trace_access_lock(int cpu)884 static inline void trace_access_lock(int cpu)
885 {
886 	(void)cpu;
887 	mutex_lock(&access_lock);
888 }
889 
trace_access_unlock(int cpu)890 static inline void trace_access_unlock(int cpu)
891 {
892 	(void)cpu;
893 	mutex_unlock(&access_lock);
894 }
895 
trace_access_lock_init(void)896 static inline void trace_access_lock_init(void)
897 {
898 }
899 
900 #endif
901 
902 #ifdef CONFIG_STACKTRACE
903 static void __ftrace_trace_stack(struct trace_buffer *buffer,
904 				 unsigned int trace_ctx,
905 				 int skip, struct pt_regs *regs);
906 static inline void ftrace_trace_stack(struct trace_array *tr,
907 				      struct trace_buffer *buffer,
908 				      unsigned int trace_ctx,
909 				      int skip, struct pt_regs *regs);
910 
911 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)912 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
913 					unsigned int trace_ctx,
914 					int skip, struct pt_regs *regs)
915 {
916 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)917 static inline void ftrace_trace_stack(struct trace_array *tr,
918 				      struct trace_buffer *buffer,
919 				      unsigned long trace_ctx,
920 				      int skip, struct pt_regs *regs)
921 {
922 }
923 
924 #endif
925 
926 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)927 trace_event_setup(struct ring_buffer_event *event,
928 		  int type, unsigned int trace_ctx)
929 {
930 	struct trace_entry *ent = ring_buffer_event_data(event);
931 
932 	tracing_generic_entry_update(ent, type, trace_ctx);
933 }
934 
935 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)936 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
937 			  int type,
938 			  unsigned long len,
939 			  unsigned int trace_ctx)
940 {
941 	struct ring_buffer_event *event;
942 
943 	event = ring_buffer_lock_reserve(buffer, len);
944 	if (event != NULL)
945 		trace_event_setup(event, type, trace_ctx);
946 
947 	return event;
948 }
949 
tracer_tracing_on(struct trace_array * tr)950 void tracer_tracing_on(struct trace_array *tr)
951 {
952 	if (tr->array_buffer.buffer)
953 		ring_buffer_record_on(tr->array_buffer.buffer);
954 	/*
955 	 * This flag is looked at when buffers haven't been allocated
956 	 * yet, or by some tracers (like irqsoff), that just want to
957 	 * know if the ring buffer has been disabled, but it can handle
958 	 * races of where it gets disabled but we still do a record.
959 	 * As the check is in the fast path of the tracers, it is more
960 	 * important to be fast than accurate.
961 	 */
962 	tr->buffer_disabled = 0;
963 	/* Make the flag seen by readers */
964 	smp_wmb();
965 }
966 
967 /**
968  * tracing_on - enable tracing buffers
969  *
970  * This function enables tracing buffers that may have been
971  * disabled with tracing_off.
972  */
tracing_on(void)973 void tracing_on(void)
974 {
975 	tracer_tracing_on(&global_trace);
976 }
977 EXPORT_SYMBOL_GPL(tracing_on);
978 
979 
980 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)981 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
982 {
983 	__this_cpu_write(trace_taskinfo_save, true);
984 
985 	/* If this is the temp buffer, we need to commit fully */
986 	if (this_cpu_read(trace_buffered_event) == event) {
987 		/* Length is in event->array[0] */
988 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
989 		/* Release the temp buffer */
990 		this_cpu_dec(trace_buffered_event_cnt);
991 	} else
992 		ring_buffer_unlock_commit(buffer, event);
993 }
994 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)995 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
996 		       const char *str, int size)
997 {
998 	struct ring_buffer_event *event;
999 	struct trace_buffer *buffer;
1000 	struct print_entry *entry;
1001 	unsigned int trace_ctx;
1002 	int alloc;
1003 
1004 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1005 		return 0;
1006 
1007 	if (unlikely(tracing_selftest_running || tracing_disabled))
1008 		return 0;
1009 
1010 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1011 
1012 	trace_ctx = tracing_gen_ctx();
1013 	buffer = tr->array_buffer.buffer;
1014 	ring_buffer_nest_start(buffer);
1015 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1016 					    trace_ctx);
1017 	if (!event) {
1018 		size = 0;
1019 		goto out;
1020 	}
1021 
1022 	entry = ring_buffer_event_data(event);
1023 	entry->ip = ip;
1024 
1025 	memcpy(&entry->buf, str, size);
1026 
1027 	/* Add a newline if necessary */
1028 	if (entry->buf[size - 1] != '\n') {
1029 		entry->buf[size] = '\n';
1030 		entry->buf[size + 1] = '\0';
1031 	} else
1032 		entry->buf[size] = '\0';
1033 
1034 	__buffer_unlock_commit(buffer, event);
1035 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1036  out:
1037 	ring_buffer_nest_end(buffer);
1038 	return size;
1039 }
1040 EXPORT_SYMBOL_GPL(__trace_array_puts);
1041 
1042 /**
1043  * __trace_puts - write a constant string into the trace buffer.
1044  * @ip:	   The address of the caller
1045  * @str:   The constant string to write
1046  * @size:  The size of the string.
1047  */
__trace_puts(unsigned long ip,const char * str,int size)1048 int __trace_puts(unsigned long ip, const char *str, int size)
1049 {
1050 	return __trace_array_puts(&global_trace, ip, str, size);
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053 
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:	   The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
__trace_bputs(unsigned long ip,const char * str)1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 	struct ring_buffer_event *event;
1062 	struct trace_buffer *buffer;
1063 	struct bputs_entry *entry;
1064 	unsigned int trace_ctx;
1065 	int size = sizeof(struct bputs_entry);
1066 	int ret = 0;
1067 
1068 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 		return 0;
1070 
1071 	if (unlikely(tracing_selftest_running || tracing_disabled))
1072 		return 0;
1073 
1074 	trace_ctx = tracing_gen_ctx();
1075 	buffer = global_trace.array_buffer.buffer;
1076 
1077 	ring_buffer_nest_start(buffer);
1078 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 					    trace_ctx);
1080 	if (!event)
1081 		goto out;
1082 
1083 	entry = ring_buffer_event_data(event);
1084 	entry->ip			= ip;
1085 	entry->str			= str;
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089 
1090 	ret = 1;
1091  out:
1092 	ring_buffer_nest_end(buffer);
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096 
1097 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 					   void *cond_data)
1100 {
1101 	struct tracer *tracer = tr->current_trace;
1102 	unsigned long flags;
1103 
1104 	if (in_nmi()) {
1105 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1107 		return;
1108 	}
1109 
1110 	if (!tr->allocated_snapshot) {
1111 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1112 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1113 		tracer_tracing_off(tr);
1114 		return;
1115 	}
1116 
1117 	/* Note, snapshot can not be used when the tracer uses it */
1118 	if (tracer->use_max_tr) {
1119 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1120 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1121 		return;
1122 	}
1123 
1124 	local_irq_save(flags);
1125 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 	local_irq_restore(flags);
1127 }
1128 
tracing_snapshot_instance(struct trace_array * tr)1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 	tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133 
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
tracing_snapshot(void)1148 void tracing_snapshot(void)
1149 {
1150 	struct trace_array *tr = &global_trace;
1151 
1152 	tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155 
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:		The tracing instance to snapshot
1159  * @cond_data:	The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 	tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174 
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:		The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
tracing_cond_snapshot_data(struct trace_array * tr)1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 	void *cond_data = NULL;
1192 
1193 	local_irq_disable();
1194 	arch_spin_lock(&tr->max_lock);
1195 
1196 	if (tr->cond_snapshot)
1197 		cond_data = tr->cond_snapshot->cond_data;
1198 
1199 	arch_spin_unlock(&tr->max_lock);
1200 	local_irq_enable();
1201 
1202 	return cond_data;
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1205 
1206 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1207 					struct array_buffer *size_buf, int cpu_id);
1208 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1209 
tracing_alloc_snapshot_instance(struct trace_array * tr)1210 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1211 {
1212 	int ret;
1213 
1214 	if (!tr->allocated_snapshot) {
1215 
1216 		/* allocate spare buffer */
1217 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1218 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1219 		if (ret < 0)
1220 			return ret;
1221 
1222 		tr->allocated_snapshot = true;
1223 	}
1224 
1225 	return 0;
1226 }
1227 
free_snapshot(struct trace_array * tr)1228 static void free_snapshot(struct trace_array *tr)
1229 {
1230 	/*
1231 	 * We don't free the ring buffer. instead, resize it because
1232 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1233 	 * we want preserve it.
1234 	 */
1235 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1236 	set_buffer_entries(&tr->max_buffer, 1);
1237 	tracing_reset_online_cpus(&tr->max_buffer);
1238 	tr->allocated_snapshot = false;
1239 }
1240 
1241 /**
1242  * tracing_alloc_snapshot - allocate snapshot buffer.
1243  *
1244  * This only allocates the snapshot buffer if it isn't already
1245  * allocated - it doesn't also take a snapshot.
1246  *
1247  * This is meant to be used in cases where the snapshot buffer needs
1248  * to be set up for events that can't sleep but need to be able to
1249  * trigger a snapshot.
1250  */
tracing_alloc_snapshot(void)1251 int tracing_alloc_snapshot(void)
1252 {
1253 	struct trace_array *tr = &global_trace;
1254 	int ret;
1255 
1256 	ret = tracing_alloc_snapshot_instance(tr);
1257 	WARN_ON(ret < 0);
1258 
1259 	return ret;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1262 
1263 /**
1264  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1265  *
1266  * This is similar to tracing_snapshot(), but it will allocate the
1267  * snapshot buffer if it isn't already allocated. Use this only
1268  * where it is safe to sleep, as the allocation may sleep.
1269  *
1270  * This causes a swap between the snapshot buffer and the current live
1271  * tracing buffer. You can use this to take snapshots of the live
1272  * trace when some condition is triggered, but continue to trace.
1273  */
tracing_snapshot_alloc(void)1274 void tracing_snapshot_alloc(void)
1275 {
1276 	int ret;
1277 
1278 	ret = tracing_alloc_snapshot();
1279 	if (ret < 0)
1280 		return;
1281 
1282 	tracing_snapshot();
1283 }
1284 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1285 
1286 /**
1287  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1288  * @tr:		The tracing instance
1289  * @cond_data:	User data to associate with the snapshot
1290  * @update:	Implementation of the cond_snapshot update function
1291  *
1292  * Check whether the conditional snapshot for the given instance has
1293  * already been enabled, or if the current tracer is already using a
1294  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1295  * save the cond_data and update function inside.
1296  *
1297  * Returns 0 if successful, error otherwise.
1298  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1299 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1300 				 cond_update_fn_t update)
1301 {
1302 	struct cond_snapshot *cond_snapshot;
1303 	int ret = 0;
1304 
1305 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1306 	if (!cond_snapshot)
1307 		return -ENOMEM;
1308 
1309 	cond_snapshot->cond_data = cond_data;
1310 	cond_snapshot->update = update;
1311 
1312 	mutex_lock(&trace_types_lock);
1313 
1314 	ret = tracing_alloc_snapshot_instance(tr);
1315 	if (ret)
1316 		goto fail_unlock;
1317 
1318 	if (tr->current_trace->use_max_tr) {
1319 		ret = -EBUSY;
1320 		goto fail_unlock;
1321 	}
1322 
1323 	/*
1324 	 * The cond_snapshot can only change to NULL without the
1325 	 * trace_types_lock. We don't care if we race with it going
1326 	 * to NULL, but we want to make sure that it's not set to
1327 	 * something other than NULL when we get here, which we can
1328 	 * do safely with only holding the trace_types_lock and not
1329 	 * having to take the max_lock.
1330 	 */
1331 	if (tr->cond_snapshot) {
1332 		ret = -EBUSY;
1333 		goto fail_unlock;
1334 	}
1335 
1336 	local_irq_disable();
1337 	arch_spin_lock(&tr->max_lock);
1338 	tr->cond_snapshot = cond_snapshot;
1339 	arch_spin_unlock(&tr->max_lock);
1340 	local_irq_enable();
1341 
1342 	mutex_unlock(&trace_types_lock);
1343 
1344 	return ret;
1345 
1346  fail_unlock:
1347 	mutex_unlock(&trace_types_lock);
1348 	kfree(cond_snapshot);
1349 	return ret;
1350 }
1351 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1352 
1353 /**
1354  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1355  * @tr:		The tracing instance
1356  *
1357  * Check whether the conditional snapshot for the given instance is
1358  * enabled; if so, free the cond_snapshot associated with it,
1359  * otherwise return -EINVAL.
1360  *
1361  * Returns 0 if successful, error otherwise.
1362  */
tracing_snapshot_cond_disable(struct trace_array * tr)1363 int tracing_snapshot_cond_disable(struct trace_array *tr)
1364 {
1365 	int ret = 0;
1366 
1367 	local_irq_disable();
1368 	arch_spin_lock(&tr->max_lock);
1369 
1370 	if (!tr->cond_snapshot)
1371 		ret = -EINVAL;
1372 	else {
1373 		kfree(tr->cond_snapshot);
1374 		tr->cond_snapshot = NULL;
1375 	}
1376 
1377 	arch_spin_unlock(&tr->max_lock);
1378 	local_irq_enable();
1379 
1380 	return ret;
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1383 #else
tracing_snapshot(void)1384 void tracing_snapshot(void)
1385 {
1386 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1389 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1390 {
1391 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1394 int tracing_alloc_snapshot(void)
1395 {
1396 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1397 	return -ENODEV;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1400 void tracing_snapshot_alloc(void)
1401 {
1402 	/* Give warning */
1403 	tracing_snapshot();
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1406 void *tracing_cond_snapshot_data(struct trace_array *tr)
1407 {
1408 	return NULL;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1411 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1412 {
1413 	return -ENODEV;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 	return false;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1421 #define free_snapshot(tr)	do { } while (0)
1422 #endif /* CONFIG_TRACER_SNAPSHOT */
1423 
tracer_tracing_off(struct trace_array * tr)1424 void tracer_tracing_off(struct trace_array *tr)
1425 {
1426 	if (tr->array_buffer.buffer)
1427 		ring_buffer_record_off(tr->array_buffer.buffer);
1428 	/*
1429 	 * This flag is looked at when buffers haven't been allocated
1430 	 * yet, or by some tracers (like irqsoff), that just want to
1431 	 * know if the ring buffer has been disabled, but it can handle
1432 	 * races of where it gets disabled but we still do a record.
1433 	 * As the check is in the fast path of the tracers, it is more
1434 	 * important to be fast than accurate.
1435 	 */
1436 	tr->buffer_disabled = 1;
1437 	/* Make the flag seen by readers */
1438 	smp_wmb();
1439 }
1440 
1441 /**
1442  * tracing_off - turn off tracing buffers
1443  *
1444  * This function stops the tracing buffers from recording data.
1445  * It does not disable any overhead the tracers themselves may
1446  * be causing. This function simply causes all recording to
1447  * the ring buffers to fail.
1448  */
tracing_off(void)1449 void tracing_off(void)
1450 {
1451 	tracer_tracing_off(&global_trace);
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_off);
1454 
disable_trace_on_warning(void)1455 void disable_trace_on_warning(void)
1456 {
1457 	if (__disable_trace_on_warning) {
1458 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1459 			"Disabling tracing due to warning\n");
1460 		tracing_off();
1461 	}
1462 }
1463 
1464 /**
1465  * tracer_tracing_is_on - show real state of ring buffer enabled
1466  * @tr : the trace array to know if ring buffer is enabled
1467  *
1468  * Shows real state of the ring buffer if it is enabled or not.
1469  */
tracer_tracing_is_on(struct trace_array * tr)1470 bool tracer_tracing_is_on(struct trace_array *tr)
1471 {
1472 	if (tr->array_buffer.buffer)
1473 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1474 	return !tr->buffer_disabled;
1475 }
1476 
1477 /**
1478  * tracing_is_on - show state of ring buffers enabled
1479  */
tracing_is_on(void)1480 int tracing_is_on(void)
1481 {
1482 	return tracer_tracing_is_on(&global_trace);
1483 }
1484 EXPORT_SYMBOL_GPL(tracing_is_on);
1485 
set_buf_size(char * str)1486 static int __init set_buf_size(char *str)
1487 {
1488 	unsigned long buf_size;
1489 
1490 	if (!str)
1491 		return 0;
1492 	buf_size = memparse(str, &str);
1493 	/*
1494 	 * nr_entries can not be zero and the startup
1495 	 * tests require some buffer space. Therefore
1496 	 * ensure we have at least 4096 bytes of buffer.
1497 	 */
1498 	trace_buf_size = max(4096UL, buf_size);
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
set_tracing_thresh(char * str)1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
nsecs_to_usecs(unsigned long nsecs)1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
trace_clock_in_ns(struct trace_array * tr)1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
trace_parser_get_init(struct trace_parser * parser,int size)1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
trace_parser_put(struct trace_parser * parser)1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 
1693 #ifdef CONFIG_TRACER_MAX_TRACE
1694 static const struct file_operations tracing_max_lat_fops;
1695 
1696 #ifdef LATENCY_FS_NOTIFY
1697 
1698 static struct workqueue_struct *fsnotify_wq;
1699 
latency_fsnotify_workfn(struct work_struct * work)1700 static void latency_fsnotify_workfn(struct work_struct *work)
1701 {
1702 	struct trace_array *tr = container_of(work, struct trace_array,
1703 					      fsnotify_work);
1704 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1705 }
1706 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1707 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1708 {
1709 	struct trace_array *tr = container_of(iwork, struct trace_array,
1710 					      fsnotify_irqwork);
1711 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1712 }
1713 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1714 static void trace_create_maxlat_file(struct trace_array *tr,
1715 				     struct dentry *d_tracer)
1716 {
1717 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1718 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1719 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1720 					      d_tracer, tr,
1721 					      &tracing_max_lat_fops);
1722 }
1723 
latency_fsnotify_init(void)1724 __init static int latency_fsnotify_init(void)
1725 {
1726 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1727 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1728 	if (!fsnotify_wq) {
1729 		pr_err("Unable to allocate tr_max_lat_wq\n");
1730 		return -ENOMEM;
1731 	}
1732 	return 0;
1733 }
1734 
1735 late_initcall_sync(latency_fsnotify_init);
1736 
latency_fsnotify(struct trace_array * tr)1737 void latency_fsnotify(struct trace_array *tr)
1738 {
1739 	if (!fsnotify_wq)
1740 		return;
1741 	/*
1742 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1743 	 * possible that we are called from __schedule() or do_idle(), which
1744 	 * could cause a deadlock.
1745 	 */
1746 	irq_work_queue(&tr->fsnotify_irqwork);
1747 }
1748 
1749 #else /* !LATENCY_FS_NOTIFY */
1750 
1751 #define trace_create_maxlat_file(tr, d_tracer)				\
1752 	trace_create_file("tracing_max_latency", 0644,			\
1753 			  d_tracer, tr, &tracing_max_lat_fops)
1754 
1755 #endif
1756 
1757 /*
1758  * Copy the new maximum trace into the separate maximum-trace
1759  * structure. (this way the maximum trace is permanently saved,
1760  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1761  */
1762 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1763 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1764 {
1765 	struct array_buffer *trace_buf = &tr->array_buffer;
1766 	struct array_buffer *max_buf = &tr->max_buffer;
1767 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1768 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1769 
1770 	max_buf->cpu = cpu;
1771 	max_buf->time_start = data->preempt_timestamp;
1772 
1773 	max_data->saved_latency = tr->max_latency;
1774 	max_data->critical_start = data->critical_start;
1775 	max_data->critical_end = data->critical_end;
1776 
1777 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1778 	max_data->pid = tsk->pid;
1779 	/*
1780 	 * If tsk == current, then use current_uid(), as that does not use
1781 	 * RCU. The irq tracer can be called out of RCU scope.
1782 	 */
1783 	if (tsk == current)
1784 		max_data->uid = current_uid();
1785 	else
1786 		max_data->uid = task_uid(tsk);
1787 
1788 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1789 	max_data->policy = tsk->policy;
1790 	max_data->rt_priority = tsk->rt_priority;
1791 
1792 	/* record this tasks comm */
1793 	tracing_record_cmdline(tsk);
1794 	latency_fsnotify(tr);
1795 }
1796 
1797 /**
1798  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1799  * @tr: tracer
1800  * @tsk: the task with the latency
1801  * @cpu: The cpu that initiated the trace.
1802  * @cond_data: User data associated with a conditional snapshot
1803  *
1804  * Flip the buffers between the @tr and the max_tr and record information
1805  * about which task was the cause of this latency.
1806  */
1807 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1808 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809 	      void *cond_data)
1810 {
1811 	if (tr->stop_count)
1812 		return;
1813 
1814 	WARN_ON_ONCE(!irqs_disabled());
1815 
1816 	if (!tr->allocated_snapshot) {
1817 		/* Only the nop tracer should hit this when disabling */
1818 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1819 		return;
1820 	}
1821 
1822 	arch_spin_lock(&tr->max_lock);
1823 
1824 	/* Inherit the recordable setting from array_buffer */
1825 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1826 		ring_buffer_record_on(tr->max_buffer.buffer);
1827 	else
1828 		ring_buffer_record_off(tr->max_buffer.buffer);
1829 
1830 #ifdef CONFIG_TRACER_SNAPSHOT
1831 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1832 		arch_spin_unlock(&tr->max_lock);
1833 		return;
1834 	}
1835 #endif
1836 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1837 
1838 	__update_max_tr(tr, tsk, cpu);
1839 
1840 	arch_spin_unlock(&tr->max_lock);
1841 
1842 	/* Any waiters on the old snapshot buffer need to wake up */
1843 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1844 }
1845 
1846 /**
1847  * update_max_tr_single - only copy one trace over, and reset the rest
1848  * @tr: tracer
1849  * @tsk: task with the latency
1850  * @cpu: the cpu of the buffer to copy.
1851  *
1852  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1853  */
1854 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1855 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1856 {
1857 	int ret;
1858 
1859 	if (tr->stop_count)
1860 		return;
1861 
1862 	WARN_ON_ONCE(!irqs_disabled());
1863 	if (!tr->allocated_snapshot) {
1864 		/* Only the nop tracer should hit this when disabling */
1865 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1866 		return;
1867 	}
1868 
1869 	arch_spin_lock(&tr->max_lock);
1870 
1871 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1872 
1873 	if (ret == -EBUSY) {
1874 		/*
1875 		 * We failed to swap the buffer due to a commit taking
1876 		 * place on this CPU. We fail to record, but we reset
1877 		 * the max trace buffer (no one writes directly to it)
1878 		 * and flag that it failed.
1879 		 * Another reason is resize is in progress.
1880 		 */
1881 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1882 			"Failed to swap buffers due to commit or resize in progress\n");
1883 	}
1884 
1885 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1886 
1887 	__update_max_tr(tr, tsk, cpu);
1888 	arch_spin_unlock(&tr->max_lock);
1889 }
1890 
1891 #endif /* CONFIG_TRACER_MAX_TRACE */
1892 
wait_on_pipe(struct trace_iterator * iter,int full)1893 static int wait_on_pipe(struct trace_iterator *iter, int full)
1894 {
1895 	int ret;
1896 
1897 	/* Iterators are static, they should be filled or empty */
1898 	if (trace_buffer_iter(iter, iter->cpu_file))
1899 		return 0;
1900 
1901 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1902 
1903 #ifdef CONFIG_TRACER_MAX_TRACE
1904 	/*
1905 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1906 	 * to happen, this would now be the main buffer.
1907 	 */
1908 	if (iter->snapshot)
1909 		iter->array_buffer = &iter->tr->max_buffer;
1910 #endif
1911 	return ret;
1912 }
1913 
1914 #ifdef CONFIG_FTRACE_STARTUP_TEST
1915 static bool selftests_can_run;
1916 
1917 struct trace_selftests {
1918 	struct list_head		list;
1919 	struct tracer			*type;
1920 };
1921 
1922 static LIST_HEAD(postponed_selftests);
1923 
save_selftest(struct tracer * type)1924 static int save_selftest(struct tracer *type)
1925 {
1926 	struct trace_selftests *selftest;
1927 
1928 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1929 	if (!selftest)
1930 		return -ENOMEM;
1931 
1932 	selftest->type = type;
1933 	list_add(&selftest->list, &postponed_selftests);
1934 	return 0;
1935 }
1936 
run_tracer_selftest(struct tracer * type)1937 static int run_tracer_selftest(struct tracer *type)
1938 {
1939 	struct trace_array *tr = &global_trace;
1940 	struct tracer *saved_tracer = tr->current_trace;
1941 	int ret;
1942 
1943 	if (!type->selftest || tracing_selftest_disabled)
1944 		return 0;
1945 
1946 	/*
1947 	 * If a tracer registers early in boot up (before scheduling is
1948 	 * initialized and such), then do not run its selftests yet.
1949 	 * Instead, run it a little later in the boot process.
1950 	 */
1951 	if (!selftests_can_run)
1952 		return save_selftest(type);
1953 
1954 	if (!tracing_is_on()) {
1955 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1956 			type->name);
1957 		return 0;
1958 	}
1959 
1960 	/*
1961 	 * Run a selftest on this tracer.
1962 	 * Here we reset the trace buffer, and set the current
1963 	 * tracer to be this tracer. The tracer can then run some
1964 	 * internal tracing to verify that everything is in order.
1965 	 * If we fail, we do not register this tracer.
1966 	 */
1967 	tracing_reset_online_cpus(&tr->array_buffer);
1968 
1969 	tr->current_trace = type;
1970 
1971 #ifdef CONFIG_TRACER_MAX_TRACE
1972 	if (type->use_max_tr) {
1973 		/* If we expanded the buffers, make sure the max is expanded too */
1974 		if (ring_buffer_expanded)
1975 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1976 					   RING_BUFFER_ALL_CPUS);
1977 		tr->allocated_snapshot = true;
1978 	}
1979 #endif
1980 
1981 	/* the test is responsible for initializing and enabling */
1982 	pr_info("Testing tracer %s: ", type->name);
1983 	ret = type->selftest(type, tr);
1984 	/* the test is responsible for resetting too */
1985 	tr->current_trace = saved_tracer;
1986 	if (ret) {
1987 		printk(KERN_CONT "FAILED!\n");
1988 		/* Add the warning after printing 'FAILED' */
1989 		WARN_ON(1);
1990 		return -1;
1991 	}
1992 	/* Only reset on passing, to avoid touching corrupted buffers */
1993 	tracing_reset_online_cpus(&tr->array_buffer);
1994 
1995 #ifdef CONFIG_TRACER_MAX_TRACE
1996 	if (type->use_max_tr) {
1997 		tr->allocated_snapshot = false;
1998 
1999 		/* Shrink the max buffer again */
2000 		if (ring_buffer_expanded)
2001 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2002 					   RING_BUFFER_ALL_CPUS);
2003 	}
2004 #endif
2005 
2006 	printk(KERN_CONT "PASSED\n");
2007 	return 0;
2008 }
2009 
init_trace_selftests(void)2010 static __init int init_trace_selftests(void)
2011 {
2012 	struct trace_selftests *p, *n;
2013 	struct tracer *t, **last;
2014 	int ret;
2015 
2016 	selftests_can_run = true;
2017 
2018 	mutex_lock(&trace_types_lock);
2019 
2020 	if (list_empty(&postponed_selftests))
2021 		goto out;
2022 
2023 	pr_info("Running postponed tracer tests:\n");
2024 
2025 	tracing_selftest_running = true;
2026 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2027 		/* This loop can take minutes when sanitizers are enabled, so
2028 		 * lets make sure we allow RCU processing.
2029 		 */
2030 		cond_resched();
2031 		ret = run_tracer_selftest(p->type);
2032 		/* If the test fails, then warn and remove from available_tracers */
2033 		if (ret < 0) {
2034 			WARN(1, "tracer: %s failed selftest, disabling\n",
2035 			     p->type->name);
2036 			last = &trace_types;
2037 			for (t = trace_types; t; t = t->next) {
2038 				if (t == p->type) {
2039 					*last = t->next;
2040 					break;
2041 				}
2042 				last = &t->next;
2043 			}
2044 		}
2045 		list_del(&p->list);
2046 		kfree(p);
2047 	}
2048 	tracing_selftest_running = false;
2049 
2050  out:
2051 	mutex_unlock(&trace_types_lock);
2052 
2053 	return 0;
2054 }
2055 core_initcall(init_trace_selftests);
2056 #else
run_tracer_selftest(struct tracer * type)2057 static inline int run_tracer_selftest(struct tracer *type)
2058 {
2059 	return 0;
2060 }
2061 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2062 
2063 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2064 
2065 static void __init apply_trace_boot_options(void);
2066 
2067 /**
2068  * register_tracer - register a tracer with the ftrace system.
2069  * @type: the plugin for the tracer
2070  *
2071  * Register a new plugin tracer.
2072  */
register_tracer(struct tracer * type)2073 int __init register_tracer(struct tracer *type)
2074 {
2075 	struct tracer *t;
2076 	int ret = 0;
2077 
2078 	if (!type->name) {
2079 		pr_info("Tracer must have a name\n");
2080 		return -1;
2081 	}
2082 
2083 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2084 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2085 		return -1;
2086 	}
2087 
2088 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2089 		pr_warn("Can not register tracer %s due to lockdown\n",
2090 			   type->name);
2091 		return -EPERM;
2092 	}
2093 
2094 	mutex_lock(&trace_types_lock);
2095 
2096 	tracing_selftest_running = true;
2097 
2098 	for (t = trace_types; t; t = t->next) {
2099 		if (strcmp(type->name, t->name) == 0) {
2100 			/* already found */
2101 			pr_info("Tracer %s already registered\n",
2102 				type->name);
2103 			ret = -1;
2104 			goto out;
2105 		}
2106 	}
2107 
2108 	if (!type->set_flag)
2109 		type->set_flag = &dummy_set_flag;
2110 	if (!type->flags) {
2111 		/*allocate a dummy tracer_flags*/
2112 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2113 		if (!type->flags) {
2114 			ret = -ENOMEM;
2115 			goto out;
2116 		}
2117 		type->flags->val = 0;
2118 		type->flags->opts = dummy_tracer_opt;
2119 	} else
2120 		if (!type->flags->opts)
2121 			type->flags->opts = dummy_tracer_opt;
2122 
2123 	/* store the tracer for __set_tracer_option */
2124 	type->flags->trace = type;
2125 
2126 	ret = run_tracer_selftest(type);
2127 	if (ret < 0)
2128 		goto out;
2129 
2130 	type->next = trace_types;
2131 	trace_types = type;
2132 	add_tracer_options(&global_trace, type);
2133 
2134  out:
2135 	tracing_selftest_running = false;
2136 	mutex_unlock(&trace_types_lock);
2137 
2138 	if (ret || !default_bootup_tracer)
2139 		goto out_unlock;
2140 
2141 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2142 		goto out_unlock;
2143 
2144 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2145 	/* Do we want this tracer to start on bootup? */
2146 	tracing_set_tracer(&global_trace, type->name);
2147 	default_bootup_tracer = NULL;
2148 
2149 	apply_trace_boot_options();
2150 
2151 	/* disable other selftests, since this will break it. */
2152 	disable_tracing_selftest("running a tracer");
2153 
2154  out_unlock:
2155 	return ret;
2156 }
2157 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2158 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2159 {
2160 	struct trace_buffer *buffer = buf->buffer;
2161 
2162 	if (!buffer)
2163 		return;
2164 
2165 	ring_buffer_record_disable(buffer);
2166 
2167 	/* Make sure all commits have finished */
2168 	synchronize_rcu();
2169 	ring_buffer_reset_cpu(buffer, cpu);
2170 
2171 	ring_buffer_record_enable(buffer);
2172 }
2173 
tracing_reset_online_cpus(struct array_buffer * buf)2174 void tracing_reset_online_cpus(struct array_buffer *buf)
2175 {
2176 	struct trace_buffer *buffer = buf->buffer;
2177 
2178 	if (!buffer)
2179 		return;
2180 
2181 	ring_buffer_record_disable(buffer);
2182 
2183 	/* Make sure all commits have finished */
2184 	synchronize_rcu();
2185 
2186 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2187 
2188 	ring_buffer_reset_online_cpus(buffer);
2189 
2190 	ring_buffer_record_enable(buffer);
2191 }
2192 
2193 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2194 void tracing_reset_all_online_cpus_unlocked(void)
2195 {
2196 	struct trace_array *tr;
2197 
2198 	lockdep_assert_held(&trace_types_lock);
2199 
2200 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2201 		if (!tr->clear_trace)
2202 			continue;
2203 		tr->clear_trace = false;
2204 		tracing_reset_online_cpus(&tr->array_buffer);
2205 #ifdef CONFIG_TRACER_MAX_TRACE
2206 		tracing_reset_online_cpus(&tr->max_buffer);
2207 #endif
2208 	}
2209 }
2210 
tracing_reset_all_online_cpus(void)2211 void tracing_reset_all_online_cpus(void)
2212 {
2213 	mutex_lock(&trace_types_lock);
2214 	tracing_reset_all_online_cpus_unlocked();
2215 	mutex_unlock(&trace_types_lock);
2216 }
2217 
2218 /*
2219  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2220  * is the tgid last observed corresponding to pid=i.
2221  */
2222 static int *tgid_map;
2223 
2224 /* The maximum valid index into tgid_map. */
2225 static size_t tgid_map_max;
2226 
2227 #define SAVED_CMDLINES_DEFAULT 128
2228 #define NO_CMDLINE_MAP UINT_MAX
2229 /*
2230  * Preemption must be disabled before acquiring trace_cmdline_lock.
2231  * The various trace_arrays' max_lock must be acquired in a context
2232  * where interrupt is disabled.
2233  */
2234 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2235 struct saved_cmdlines_buffer {
2236 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2237 	unsigned *map_cmdline_to_pid;
2238 	unsigned cmdline_num;
2239 	int cmdline_idx;
2240 	char saved_cmdlines[];
2241 };
2242 static struct saved_cmdlines_buffer *savedcmd;
2243 
get_saved_cmdlines(int idx)2244 static inline char *get_saved_cmdlines(int idx)
2245 {
2246 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2247 }
2248 
set_cmdline(int idx,const char * cmdline)2249 static inline void set_cmdline(int idx, const char *cmdline)
2250 {
2251 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2252 }
2253 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2254 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2255 {
2256 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2257 
2258 	kfree(s->map_cmdline_to_pid);
2259 	kmemleak_free(s);
2260 	free_pages((unsigned long)s, order);
2261 }
2262 
allocate_cmdlines_buffer(unsigned int val)2263 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2264 {
2265 	struct saved_cmdlines_buffer *s;
2266 	struct page *page;
2267 	int orig_size, size;
2268 	int order;
2269 
2270 	/* Figure out how much is needed to hold the given number of cmdlines */
2271 	orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2272 	order = get_order(orig_size);
2273 	size = 1 << (order + PAGE_SHIFT);
2274 	page = alloc_pages(GFP_KERNEL, order);
2275 	if (!page)
2276 		return NULL;
2277 
2278 	s = page_address(page);
2279 	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2280 	memset(s, 0, sizeof(*s));
2281 
2282 	/* Round up to actual allocation */
2283 	val = (size - sizeof(*s)) / TASK_COMM_LEN;
2284 	s->cmdline_num = val;
2285 
2286 	s->map_cmdline_to_pid = kmalloc_array(val,
2287 					      sizeof(*s->map_cmdline_to_pid),
2288 					      GFP_KERNEL);
2289 	if (!s->map_cmdline_to_pid) {
2290 		free_saved_cmdlines_buffer(s);
2291 		return NULL;
2292 	}
2293 
2294 	s->cmdline_idx = 0;
2295 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2296 	       sizeof(s->map_pid_to_cmdline));
2297 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2298 	       val * sizeof(*s->map_cmdline_to_pid));
2299 
2300 	return s;
2301 }
2302 
trace_create_savedcmd(void)2303 static int trace_create_savedcmd(void)
2304 {
2305 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2306 
2307 	return savedcmd ? 0 : -ENOMEM;
2308 }
2309 
is_tracing_stopped(void)2310 int is_tracing_stopped(void)
2311 {
2312 	return global_trace.stop_count;
2313 }
2314 
tracing_start_tr(struct trace_array * tr)2315 static void tracing_start_tr(struct trace_array *tr)
2316 {
2317 	struct trace_buffer *buffer;
2318 	unsigned long flags;
2319 
2320 	if (tracing_disabled)
2321 		return;
2322 
2323 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2324 	if (--tr->stop_count) {
2325 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2326 			/* Someone screwed up their debugging */
2327 			tr->stop_count = 0;
2328 		}
2329 		goto out;
2330 	}
2331 
2332 	/* Prevent the buffers from switching */
2333 	arch_spin_lock(&tr->max_lock);
2334 
2335 	buffer = tr->array_buffer.buffer;
2336 	if (buffer)
2337 		ring_buffer_record_enable(buffer);
2338 
2339 #ifdef CONFIG_TRACER_MAX_TRACE
2340 	buffer = tr->max_buffer.buffer;
2341 	if (buffer)
2342 		ring_buffer_record_enable(buffer);
2343 #endif
2344 
2345 	arch_spin_unlock(&tr->max_lock);
2346 
2347  out:
2348 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2349 }
2350 
2351 /**
2352  * tracing_start - quick start of the tracer
2353  *
2354  * If tracing is enabled but was stopped by tracing_stop,
2355  * this will start the tracer back up.
2356  */
tracing_start(void)2357 void tracing_start(void)
2358 
2359 {
2360 	return tracing_start_tr(&global_trace);
2361 }
2362 
tracing_stop_tr(struct trace_array * tr)2363 static void tracing_stop_tr(struct trace_array *tr)
2364 {
2365 	struct trace_buffer *buffer;
2366 	unsigned long flags;
2367 
2368 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2369 	if (tr->stop_count++)
2370 		goto out;
2371 
2372 	/* Prevent the buffers from switching */
2373 	arch_spin_lock(&tr->max_lock);
2374 
2375 	buffer = tr->array_buffer.buffer;
2376 	if (buffer)
2377 		ring_buffer_record_disable(buffer);
2378 
2379 #ifdef CONFIG_TRACER_MAX_TRACE
2380 	buffer = tr->max_buffer.buffer;
2381 	if (buffer)
2382 		ring_buffer_record_disable(buffer);
2383 #endif
2384 
2385 	arch_spin_unlock(&tr->max_lock);
2386 
2387  out:
2388 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2389 }
2390 
2391 /**
2392  * tracing_stop - quick stop of the tracer
2393  *
2394  * Light weight way to stop tracing. Use in conjunction with
2395  * tracing_start.
2396  */
tracing_stop(void)2397 void tracing_stop(void)
2398 {
2399 	return tracing_stop_tr(&global_trace);
2400 }
2401 
trace_save_cmdline(struct task_struct * tsk)2402 static int trace_save_cmdline(struct task_struct *tsk)
2403 {
2404 	unsigned tpid, idx;
2405 
2406 	/* treat recording of idle task as a success */
2407 	if (!tsk->pid)
2408 		return 1;
2409 
2410 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2411 
2412 	/*
2413 	 * It's not the end of the world if we don't get
2414 	 * the lock, but we also don't want to spin
2415 	 * nor do we want to disable interrupts,
2416 	 * so if we miss here, then better luck next time.
2417 	 *
2418 	 * This is called within the scheduler and wake up, so interrupts
2419 	 * had better been disabled and run queue lock been held.
2420 	 */
2421 	lockdep_assert_preemption_disabled();
2422 	if (!arch_spin_trylock(&trace_cmdline_lock))
2423 		return 0;
2424 
2425 	idx = savedcmd->map_pid_to_cmdline[tpid];
2426 	if (idx == NO_CMDLINE_MAP) {
2427 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2428 
2429 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2430 		savedcmd->cmdline_idx = idx;
2431 	}
2432 
2433 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2434 	set_cmdline(idx, tsk->comm);
2435 
2436 	arch_spin_unlock(&trace_cmdline_lock);
2437 
2438 	return 1;
2439 }
2440 
__trace_find_cmdline(int pid,char comm[])2441 static void __trace_find_cmdline(int pid, char comm[])
2442 {
2443 	unsigned map;
2444 	int tpid;
2445 
2446 	if (!pid) {
2447 		strcpy(comm, "<idle>");
2448 		return;
2449 	}
2450 
2451 	if (WARN_ON_ONCE(pid < 0)) {
2452 		strcpy(comm, "<XXX>");
2453 		return;
2454 	}
2455 
2456 	tpid = pid & (PID_MAX_DEFAULT - 1);
2457 	map = savedcmd->map_pid_to_cmdline[tpid];
2458 	if (map != NO_CMDLINE_MAP) {
2459 		tpid = savedcmd->map_cmdline_to_pid[map];
2460 		if (tpid == pid) {
2461 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2462 			return;
2463 		}
2464 	}
2465 	strcpy(comm, "<...>");
2466 }
2467 
trace_find_cmdline(int pid,char comm[])2468 void trace_find_cmdline(int pid, char comm[])
2469 {
2470 	preempt_disable();
2471 	arch_spin_lock(&trace_cmdline_lock);
2472 
2473 	__trace_find_cmdline(pid, comm);
2474 
2475 	arch_spin_unlock(&trace_cmdline_lock);
2476 	preempt_enable();
2477 }
2478 
trace_find_tgid_ptr(int pid)2479 static int *trace_find_tgid_ptr(int pid)
2480 {
2481 	/*
2482 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2483 	 * if we observe a non-NULL tgid_map then we also observe the correct
2484 	 * tgid_map_max.
2485 	 */
2486 	int *map = smp_load_acquire(&tgid_map);
2487 
2488 	if (unlikely(!map || pid > tgid_map_max))
2489 		return NULL;
2490 
2491 	return &map[pid];
2492 }
2493 
trace_find_tgid(int pid)2494 int trace_find_tgid(int pid)
2495 {
2496 	int *ptr = trace_find_tgid_ptr(pid);
2497 
2498 	return ptr ? *ptr : 0;
2499 }
2500 
trace_save_tgid(struct task_struct * tsk)2501 static int trace_save_tgid(struct task_struct *tsk)
2502 {
2503 	int *ptr;
2504 
2505 	/* treat recording of idle task as a success */
2506 	if (!tsk->pid)
2507 		return 1;
2508 
2509 	ptr = trace_find_tgid_ptr(tsk->pid);
2510 	if (!ptr)
2511 		return 0;
2512 
2513 	*ptr = tsk->tgid;
2514 	return 1;
2515 }
2516 
tracing_record_taskinfo_skip(int flags)2517 static bool tracing_record_taskinfo_skip(int flags)
2518 {
2519 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2520 		return true;
2521 	if (!__this_cpu_read(trace_taskinfo_save))
2522 		return true;
2523 	return false;
2524 }
2525 
2526 /**
2527  * tracing_record_taskinfo - record the task info of a task
2528  *
2529  * @task:  task to record
2530  * @flags: TRACE_RECORD_CMDLINE for recording comm
2531  *         TRACE_RECORD_TGID for recording tgid
2532  */
tracing_record_taskinfo(struct task_struct * task,int flags)2533 void tracing_record_taskinfo(struct task_struct *task, int flags)
2534 {
2535 	bool done;
2536 
2537 	if (tracing_record_taskinfo_skip(flags))
2538 		return;
2539 
2540 	/*
2541 	 * Record as much task information as possible. If some fail, continue
2542 	 * to try to record the others.
2543 	 */
2544 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2545 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2546 
2547 	/* If recording any information failed, retry again soon. */
2548 	if (!done)
2549 		return;
2550 
2551 	__this_cpu_write(trace_taskinfo_save, false);
2552 }
2553 
2554 /**
2555  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2556  *
2557  * @prev: previous task during sched_switch
2558  * @next: next task during sched_switch
2559  * @flags: TRACE_RECORD_CMDLINE for recording comm
2560  *         TRACE_RECORD_TGID for recording tgid
2561  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2562 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2563 					  struct task_struct *next, int flags)
2564 {
2565 	bool done;
2566 
2567 	if (tracing_record_taskinfo_skip(flags))
2568 		return;
2569 
2570 	/*
2571 	 * Record as much task information as possible. If some fail, continue
2572 	 * to try to record the others.
2573 	 */
2574 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2575 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2576 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2577 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2578 
2579 	/* If recording any information failed, retry again soon. */
2580 	if (!done)
2581 		return;
2582 
2583 	__this_cpu_write(trace_taskinfo_save, false);
2584 }
2585 
2586 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2587 void tracing_record_cmdline(struct task_struct *task)
2588 {
2589 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2590 }
2591 
tracing_record_tgid(struct task_struct * task)2592 void tracing_record_tgid(struct task_struct *task)
2593 {
2594 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2595 }
2596 
2597 /*
2598  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2599  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2600  * simplifies those functions and keeps them in sync.
2601  */
trace_handle_return(struct trace_seq * s)2602 enum print_line_t trace_handle_return(struct trace_seq *s)
2603 {
2604 	return trace_seq_has_overflowed(s) ?
2605 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2606 }
2607 EXPORT_SYMBOL_GPL(trace_handle_return);
2608 
migration_disable_value(void)2609 static unsigned short migration_disable_value(void)
2610 {
2611 #if defined(CONFIG_SMP)
2612 	return current->migration_disabled;
2613 #else
2614 	return 0;
2615 #endif
2616 }
2617 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2618 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2619 {
2620 	unsigned int trace_flags = irqs_status;
2621 	unsigned int pc;
2622 
2623 	pc = preempt_count();
2624 
2625 	if (pc & NMI_MASK)
2626 		trace_flags |= TRACE_FLAG_NMI;
2627 	if (pc & HARDIRQ_MASK)
2628 		trace_flags |= TRACE_FLAG_HARDIRQ;
2629 	if (in_serving_softirq())
2630 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2631 
2632 	if (tif_need_resched())
2633 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2634 	if (test_preempt_need_resched())
2635 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2636 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2637 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2638 }
2639 
2640 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2641 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2642 			  int type,
2643 			  unsigned long len,
2644 			  unsigned int trace_ctx)
2645 {
2646 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2647 }
2648 
2649 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2650 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2651 static int trace_buffered_event_ref;
2652 
2653 /**
2654  * trace_buffered_event_enable - enable buffering events
2655  *
2656  * When events are being filtered, it is quicker to use a temporary
2657  * buffer to write the event data into if there's a likely chance
2658  * that it will not be committed. The discard of the ring buffer
2659  * is not as fast as committing, and is much slower than copying
2660  * a commit.
2661  *
2662  * When an event is to be filtered, allocate per cpu buffers to
2663  * write the event data into, and if the event is filtered and discarded
2664  * it is simply dropped, otherwise, the entire data is to be committed
2665  * in one shot.
2666  */
trace_buffered_event_enable(void)2667 void trace_buffered_event_enable(void)
2668 {
2669 	struct ring_buffer_event *event;
2670 	struct page *page;
2671 	int cpu;
2672 
2673 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2674 
2675 	if (trace_buffered_event_ref++)
2676 		return;
2677 
2678 	for_each_tracing_cpu(cpu) {
2679 		page = alloc_pages_node(cpu_to_node(cpu),
2680 					GFP_KERNEL | __GFP_NORETRY, 0);
2681 		/* This is just an optimization and can handle failures */
2682 		if (!page) {
2683 			pr_err("Failed to allocate event buffer\n");
2684 			break;
2685 		}
2686 
2687 		event = page_address(page);
2688 		memset(event, 0, sizeof(*event));
2689 
2690 		per_cpu(trace_buffered_event, cpu) = event;
2691 
2692 		preempt_disable();
2693 		if (cpu == smp_processor_id() &&
2694 		    __this_cpu_read(trace_buffered_event) !=
2695 		    per_cpu(trace_buffered_event, cpu))
2696 			WARN_ON_ONCE(1);
2697 		preempt_enable();
2698 	}
2699 }
2700 
enable_trace_buffered_event(void * data)2701 static void enable_trace_buffered_event(void *data)
2702 {
2703 	/* Probably not needed, but do it anyway */
2704 	smp_rmb();
2705 	this_cpu_dec(trace_buffered_event_cnt);
2706 }
2707 
disable_trace_buffered_event(void * data)2708 static void disable_trace_buffered_event(void *data)
2709 {
2710 	this_cpu_inc(trace_buffered_event_cnt);
2711 }
2712 
2713 /**
2714  * trace_buffered_event_disable - disable buffering events
2715  *
2716  * When a filter is removed, it is faster to not use the buffered
2717  * events, and to commit directly into the ring buffer. Free up
2718  * the temp buffers when there are no more users. This requires
2719  * special synchronization with current events.
2720  */
trace_buffered_event_disable(void)2721 void trace_buffered_event_disable(void)
2722 {
2723 	int cpu;
2724 
2725 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2726 
2727 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2728 		return;
2729 
2730 	if (--trace_buffered_event_ref)
2731 		return;
2732 
2733 	/* For each CPU, set the buffer as used. */
2734 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2735 			 NULL, true);
2736 
2737 	/* Wait for all current users to finish */
2738 	synchronize_rcu();
2739 
2740 	for_each_tracing_cpu(cpu) {
2741 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2742 		per_cpu(trace_buffered_event, cpu) = NULL;
2743 	}
2744 
2745 	/*
2746 	 * Wait for all CPUs that potentially started checking if they can use
2747 	 * their event buffer only after the previous synchronize_rcu() call and
2748 	 * they still read a valid pointer from trace_buffered_event. It must be
2749 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2750 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2751 	 */
2752 	synchronize_rcu();
2753 
2754 	/* For each CPU, relinquish the buffer */
2755 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2756 			 true);
2757 }
2758 
2759 static struct trace_buffer *temp_buffer;
2760 
2761 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2762 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2763 			  struct trace_event_file *trace_file,
2764 			  int type, unsigned long len,
2765 			  unsigned int trace_ctx)
2766 {
2767 	struct ring_buffer_event *entry;
2768 	struct trace_array *tr = trace_file->tr;
2769 	int val;
2770 
2771 	*current_rb = tr->array_buffer.buffer;
2772 
2773 	if (!tr->no_filter_buffering_ref &&
2774 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2775 	    (entry = this_cpu_read(trace_buffered_event))) {
2776 		/*
2777 		 * Filtering is on, so try to use the per cpu buffer first.
2778 		 * This buffer will simulate a ring_buffer_event,
2779 		 * where the type_len is zero and the array[0] will
2780 		 * hold the full length.
2781 		 * (see include/linux/ring-buffer.h for details on
2782 		 *  how the ring_buffer_event is structured).
2783 		 *
2784 		 * Using a temp buffer during filtering and copying it
2785 		 * on a matched filter is quicker than writing directly
2786 		 * into the ring buffer and then discarding it when
2787 		 * it doesn't match. That is because the discard
2788 		 * requires several atomic operations to get right.
2789 		 * Copying on match and doing nothing on a failed match
2790 		 * is still quicker than no copy on match, but having
2791 		 * to discard out of the ring buffer on a failed match.
2792 		 */
2793 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2794 
2795 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2796 
2797 		/*
2798 		 * Preemption is disabled, but interrupts and NMIs
2799 		 * can still come in now. If that happens after
2800 		 * the above increment, then it will have to go
2801 		 * back to the old method of allocating the event
2802 		 * on the ring buffer, and if the filter fails, it
2803 		 * will have to call ring_buffer_discard_commit()
2804 		 * to remove it.
2805 		 *
2806 		 * Need to also check the unlikely case that the
2807 		 * length is bigger than the temp buffer size.
2808 		 * If that happens, then the reserve is pretty much
2809 		 * guaranteed to fail, as the ring buffer currently
2810 		 * only allows events less than a page. But that may
2811 		 * change in the future, so let the ring buffer reserve
2812 		 * handle the failure in that case.
2813 		 */
2814 		if (val == 1 && likely(len <= max_len)) {
2815 			trace_event_setup(entry, type, trace_ctx);
2816 			entry->array[0] = len;
2817 			return entry;
2818 		}
2819 		this_cpu_dec(trace_buffered_event_cnt);
2820 	}
2821 
2822 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2823 					    trace_ctx);
2824 	/*
2825 	 * If tracing is off, but we have triggers enabled
2826 	 * we still need to look at the event data. Use the temp_buffer
2827 	 * to store the trace event for the trigger to use. It's recursive
2828 	 * safe and will not be recorded anywhere.
2829 	 */
2830 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2831 		*current_rb = temp_buffer;
2832 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2833 						    trace_ctx);
2834 	}
2835 	return entry;
2836 }
2837 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2838 
2839 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2840 static DEFINE_MUTEX(tracepoint_printk_mutex);
2841 
output_printk(struct trace_event_buffer * fbuffer)2842 static void output_printk(struct trace_event_buffer *fbuffer)
2843 {
2844 	struct trace_event_call *event_call;
2845 	struct trace_event_file *file;
2846 	struct trace_event *event;
2847 	unsigned long flags;
2848 	struct trace_iterator *iter = tracepoint_print_iter;
2849 
2850 	/* We should never get here if iter is NULL */
2851 	if (WARN_ON_ONCE(!iter))
2852 		return;
2853 
2854 	event_call = fbuffer->trace_file->event_call;
2855 	if (!event_call || !event_call->event.funcs ||
2856 	    !event_call->event.funcs->trace)
2857 		return;
2858 
2859 	file = fbuffer->trace_file;
2860 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2861 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2862 	     !filter_match_preds(file->filter, fbuffer->entry)))
2863 		return;
2864 
2865 	event = &fbuffer->trace_file->event_call->event;
2866 
2867 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2868 	trace_seq_init(&iter->seq);
2869 	iter->ent = fbuffer->entry;
2870 	event_call->event.funcs->trace(iter, 0, event);
2871 	trace_seq_putc(&iter->seq, 0);
2872 	printk("%s", iter->seq.buffer);
2873 
2874 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2875 }
2876 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2877 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2878 			     void *buffer, size_t *lenp,
2879 			     loff_t *ppos)
2880 {
2881 	int save_tracepoint_printk;
2882 	int ret;
2883 
2884 	mutex_lock(&tracepoint_printk_mutex);
2885 	save_tracepoint_printk = tracepoint_printk;
2886 
2887 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2888 
2889 	/*
2890 	 * This will force exiting early, as tracepoint_printk
2891 	 * is always zero when tracepoint_printk_iter is not allocated
2892 	 */
2893 	if (!tracepoint_print_iter)
2894 		tracepoint_printk = 0;
2895 
2896 	if (save_tracepoint_printk == tracepoint_printk)
2897 		goto out;
2898 
2899 	if (tracepoint_printk)
2900 		static_key_enable(&tracepoint_printk_key.key);
2901 	else
2902 		static_key_disable(&tracepoint_printk_key.key);
2903 
2904  out:
2905 	mutex_unlock(&tracepoint_printk_mutex);
2906 
2907 	return ret;
2908 }
2909 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2910 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2911 {
2912 	enum event_trigger_type tt = ETT_NONE;
2913 	struct trace_event_file *file = fbuffer->trace_file;
2914 
2915 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2916 			fbuffer->entry, &tt))
2917 		goto discard;
2918 
2919 	if (static_key_false(&tracepoint_printk_key.key))
2920 		output_printk(fbuffer);
2921 
2922 	if (static_branch_unlikely(&trace_event_exports_enabled))
2923 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2924 
2925 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2926 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2927 
2928 discard:
2929 	if (tt)
2930 		event_triggers_post_call(file, tt);
2931 
2932 }
2933 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2934 
2935 /*
2936  * Skip 3:
2937  *
2938  *   trace_buffer_unlock_commit_regs()
2939  *   trace_event_buffer_commit()
2940  *   trace_event_raw_event_xxx()
2941  */
2942 # define STACK_SKIP 3
2943 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2944 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2945 				     struct trace_buffer *buffer,
2946 				     struct ring_buffer_event *event,
2947 				     unsigned int trace_ctx,
2948 				     struct pt_regs *regs)
2949 {
2950 	__buffer_unlock_commit(buffer, event);
2951 
2952 	/*
2953 	 * If regs is not set, then skip the necessary functions.
2954 	 * Note, we can still get here via blktrace, wakeup tracer
2955 	 * and mmiotrace, but that's ok if they lose a function or
2956 	 * two. They are not that meaningful.
2957 	 */
2958 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2959 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2960 }
2961 
2962 /*
2963  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2964  */
2965 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2966 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2967 				   struct ring_buffer_event *event)
2968 {
2969 	__buffer_unlock_commit(buffer, event);
2970 }
2971 
2972 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2973 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2974 	       parent_ip, unsigned int trace_ctx)
2975 {
2976 	struct trace_event_call *call = &event_function;
2977 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2978 	struct ring_buffer_event *event;
2979 	struct ftrace_entry *entry;
2980 
2981 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2982 					    trace_ctx);
2983 	if (!event)
2984 		return;
2985 	entry	= ring_buffer_event_data(event);
2986 	entry->ip			= ip;
2987 	entry->parent_ip		= parent_ip;
2988 
2989 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2990 		if (static_branch_unlikely(&trace_function_exports_enabled))
2991 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2992 		__buffer_unlock_commit(buffer, event);
2993 	}
2994 }
2995 
2996 #ifdef CONFIG_STACKTRACE
2997 
2998 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2999 #define FTRACE_KSTACK_NESTING	4
3000 
3001 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3002 
3003 struct ftrace_stack {
3004 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3005 };
3006 
3007 
3008 struct ftrace_stacks {
3009 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3010 };
3011 
3012 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3013 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3014 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3015 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3016 				 unsigned int trace_ctx,
3017 				 int skip, struct pt_regs *regs)
3018 {
3019 	struct trace_event_call *call = &event_kernel_stack;
3020 	struct ring_buffer_event *event;
3021 	unsigned int size, nr_entries;
3022 	struct ftrace_stack *fstack;
3023 	struct stack_entry *entry;
3024 	int stackidx;
3025 
3026 	/*
3027 	 * Add one, for this function and the call to save_stack_trace()
3028 	 * If regs is set, then these functions will not be in the way.
3029 	 */
3030 #ifndef CONFIG_UNWINDER_ORC
3031 	if (!regs)
3032 		skip++;
3033 #endif
3034 
3035 	preempt_disable_notrace();
3036 
3037 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3038 
3039 	/* This should never happen. If it does, yell once and skip */
3040 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3041 		goto out;
3042 
3043 	/*
3044 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3045 	 * interrupt will either see the value pre increment or post
3046 	 * increment. If the interrupt happens pre increment it will have
3047 	 * restored the counter when it returns.  We just need a barrier to
3048 	 * keep gcc from moving things around.
3049 	 */
3050 	barrier();
3051 
3052 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3053 	size = ARRAY_SIZE(fstack->calls);
3054 
3055 	if (regs) {
3056 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3057 						   size, skip);
3058 	} else {
3059 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3060 	}
3061 
3062 	size = nr_entries * sizeof(unsigned long);
3063 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3064 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3065 				    trace_ctx);
3066 	if (!event)
3067 		goto out;
3068 	entry = ring_buffer_event_data(event);
3069 
3070 	memcpy(&entry->caller, fstack->calls, size);
3071 	entry->size = nr_entries;
3072 
3073 	if (!call_filter_check_discard(call, entry, buffer, event))
3074 		__buffer_unlock_commit(buffer, event);
3075 
3076  out:
3077 	/* Again, don't let gcc optimize things here */
3078 	barrier();
3079 	__this_cpu_dec(ftrace_stack_reserve);
3080 	preempt_enable_notrace();
3081 
3082 }
3083 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3084 static inline void ftrace_trace_stack(struct trace_array *tr,
3085 				      struct trace_buffer *buffer,
3086 				      unsigned int trace_ctx,
3087 				      int skip, struct pt_regs *regs)
3088 {
3089 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3090 		return;
3091 
3092 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3093 }
3094 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3095 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3096 		   int skip)
3097 {
3098 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3099 
3100 	if (rcu_is_watching()) {
3101 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3102 		return;
3103 	}
3104 
3105 	/*
3106 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3107 	 * but if the above rcu_is_watching() failed, then the NMI
3108 	 * triggered someplace critical, and rcu_irq_enter() should
3109 	 * not be called from NMI.
3110 	 */
3111 	if (unlikely(in_nmi()))
3112 		return;
3113 
3114 	rcu_irq_enter_irqson();
3115 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3116 	rcu_irq_exit_irqson();
3117 }
3118 
3119 /**
3120  * trace_dump_stack - record a stack back trace in the trace buffer
3121  * @skip: Number of functions to skip (helper handlers)
3122  */
trace_dump_stack(int skip)3123 void trace_dump_stack(int skip)
3124 {
3125 	if (tracing_disabled || tracing_selftest_running)
3126 		return;
3127 
3128 #ifndef CONFIG_UNWINDER_ORC
3129 	/* Skip 1 to skip this function. */
3130 	skip++;
3131 #endif
3132 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3133 			     tracing_gen_ctx(), skip, NULL);
3134 }
3135 EXPORT_SYMBOL_GPL(trace_dump_stack);
3136 
3137 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3138 static DEFINE_PER_CPU(int, user_stack_count);
3139 
3140 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3141 ftrace_trace_userstack(struct trace_array *tr,
3142 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3143 {
3144 	struct trace_event_call *call = &event_user_stack;
3145 	struct ring_buffer_event *event;
3146 	struct userstack_entry *entry;
3147 
3148 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3149 		return;
3150 
3151 	/*
3152 	 * NMIs can not handle page faults, even with fix ups.
3153 	 * The save user stack can (and often does) fault.
3154 	 */
3155 	if (unlikely(in_nmi()))
3156 		return;
3157 
3158 	/*
3159 	 * prevent recursion, since the user stack tracing may
3160 	 * trigger other kernel events.
3161 	 */
3162 	preempt_disable();
3163 	if (__this_cpu_read(user_stack_count))
3164 		goto out;
3165 
3166 	__this_cpu_inc(user_stack_count);
3167 
3168 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3169 					    sizeof(*entry), trace_ctx);
3170 	if (!event)
3171 		goto out_drop_count;
3172 	entry	= ring_buffer_event_data(event);
3173 
3174 	entry->tgid		= current->tgid;
3175 	memset(&entry->caller, 0, sizeof(entry->caller));
3176 
3177 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3178 	if (!call_filter_check_discard(call, entry, buffer, event))
3179 		__buffer_unlock_commit(buffer, event);
3180 
3181  out_drop_count:
3182 	__this_cpu_dec(user_stack_count);
3183  out:
3184 	preempt_enable();
3185 }
3186 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3187 static void ftrace_trace_userstack(struct trace_array *tr,
3188 				   struct trace_buffer *buffer,
3189 				   unsigned int trace_ctx)
3190 {
3191 }
3192 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3193 
3194 #endif /* CONFIG_STACKTRACE */
3195 
3196 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3197 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3198 			  unsigned long long delta)
3199 {
3200 	entry->bottom_delta_ts = delta & U32_MAX;
3201 	entry->top_delta_ts = (delta >> 32);
3202 }
3203 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3204 void trace_last_func_repeats(struct trace_array *tr,
3205 			     struct trace_func_repeats *last_info,
3206 			     unsigned int trace_ctx)
3207 {
3208 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3209 	struct func_repeats_entry *entry;
3210 	struct ring_buffer_event *event;
3211 	u64 delta;
3212 
3213 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3214 					    sizeof(*entry), trace_ctx);
3215 	if (!event)
3216 		return;
3217 
3218 	delta = ring_buffer_event_time_stamp(buffer, event) -
3219 		last_info->ts_last_call;
3220 
3221 	entry = ring_buffer_event_data(event);
3222 	entry->ip = last_info->ip;
3223 	entry->parent_ip = last_info->parent_ip;
3224 	entry->count = last_info->count;
3225 	func_repeats_set_delta_ts(entry, delta);
3226 
3227 	__buffer_unlock_commit(buffer, event);
3228 }
3229 
3230 /* created for use with alloc_percpu */
3231 struct trace_buffer_struct {
3232 	int nesting;
3233 	char buffer[4][TRACE_BUF_SIZE];
3234 };
3235 
3236 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3237 
3238 /*
3239  * This allows for lockless recording.  If we're nested too deeply, then
3240  * this returns NULL.
3241  */
get_trace_buf(void)3242 static char *get_trace_buf(void)
3243 {
3244 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3245 
3246 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3247 		return NULL;
3248 
3249 	buffer->nesting++;
3250 
3251 	/* Interrupts must see nesting incremented before we use the buffer */
3252 	barrier();
3253 	return &buffer->buffer[buffer->nesting - 1][0];
3254 }
3255 
put_trace_buf(void)3256 static void put_trace_buf(void)
3257 {
3258 	/* Don't let the decrement of nesting leak before this */
3259 	barrier();
3260 	this_cpu_dec(trace_percpu_buffer->nesting);
3261 }
3262 
alloc_percpu_trace_buffer(void)3263 static int alloc_percpu_trace_buffer(void)
3264 {
3265 	struct trace_buffer_struct __percpu *buffers;
3266 
3267 	if (trace_percpu_buffer)
3268 		return 0;
3269 
3270 	buffers = alloc_percpu(struct trace_buffer_struct);
3271 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3272 		return -ENOMEM;
3273 
3274 	trace_percpu_buffer = buffers;
3275 	return 0;
3276 }
3277 
3278 static int buffers_allocated;
3279 
trace_printk_init_buffers(void)3280 void trace_printk_init_buffers(void)
3281 {
3282 	if (buffers_allocated)
3283 		return;
3284 
3285 	if (alloc_percpu_trace_buffer())
3286 		return;
3287 
3288 	/* trace_printk() is for debug use only. Don't use it in production. */
3289 
3290 	pr_warn("\n");
3291 	pr_warn("**********************************************************\n");
3292 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3293 	pr_warn("**                                                      **\n");
3294 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3295 	pr_warn("**                                                      **\n");
3296 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3297 	pr_warn("** unsafe for production use.                           **\n");
3298 	pr_warn("**                                                      **\n");
3299 	pr_warn("** If you see this message and you are not debugging    **\n");
3300 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3301 	pr_warn("**                                                      **\n");
3302 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3303 	pr_warn("**********************************************************\n");
3304 
3305 	/* Expand the buffers to set size */
3306 	tracing_update_buffers();
3307 
3308 	buffers_allocated = 1;
3309 
3310 	/*
3311 	 * trace_printk_init_buffers() can be called by modules.
3312 	 * If that happens, then we need to start cmdline recording
3313 	 * directly here. If the global_trace.buffer is already
3314 	 * allocated here, then this was called by module code.
3315 	 */
3316 	if (global_trace.array_buffer.buffer)
3317 		tracing_start_cmdline_record();
3318 }
3319 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3320 
trace_printk_start_comm(void)3321 void trace_printk_start_comm(void)
3322 {
3323 	/* Start tracing comms if trace printk is set */
3324 	if (!buffers_allocated)
3325 		return;
3326 	tracing_start_cmdline_record();
3327 }
3328 
trace_printk_start_stop_comm(int enabled)3329 static void trace_printk_start_stop_comm(int enabled)
3330 {
3331 	if (!buffers_allocated)
3332 		return;
3333 
3334 	if (enabled)
3335 		tracing_start_cmdline_record();
3336 	else
3337 		tracing_stop_cmdline_record();
3338 }
3339 
3340 /**
3341  * trace_vbprintk - write binary msg to tracing buffer
3342  * @ip:    The address of the caller
3343  * @fmt:   The string format to write to the buffer
3344  * @args:  Arguments for @fmt
3345  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3346 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3347 {
3348 	struct trace_event_call *call = &event_bprint;
3349 	struct ring_buffer_event *event;
3350 	struct trace_buffer *buffer;
3351 	struct trace_array *tr = &global_trace;
3352 	struct bprint_entry *entry;
3353 	unsigned int trace_ctx;
3354 	char *tbuffer;
3355 	int len = 0, size;
3356 
3357 	if (unlikely(tracing_selftest_running || tracing_disabled))
3358 		return 0;
3359 
3360 	/* Don't pollute graph traces with trace_vprintk internals */
3361 	pause_graph_tracing();
3362 
3363 	trace_ctx = tracing_gen_ctx();
3364 	preempt_disable_notrace();
3365 
3366 	tbuffer = get_trace_buf();
3367 	if (!tbuffer) {
3368 		len = 0;
3369 		goto out_nobuffer;
3370 	}
3371 
3372 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3373 
3374 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3375 		goto out_put;
3376 
3377 	size = sizeof(*entry) + sizeof(u32) * len;
3378 	buffer = tr->array_buffer.buffer;
3379 	ring_buffer_nest_start(buffer);
3380 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3381 					    trace_ctx);
3382 	if (!event)
3383 		goto out;
3384 	entry = ring_buffer_event_data(event);
3385 	entry->ip			= ip;
3386 	entry->fmt			= fmt;
3387 
3388 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3389 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3390 		__buffer_unlock_commit(buffer, event);
3391 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3392 	}
3393 
3394 out:
3395 	ring_buffer_nest_end(buffer);
3396 out_put:
3397 	put_trace_buf();
3398 
3399 out_nobuffer:
3400 	preempt_enable_notrace();
3401 	unpause_graph_tracing();
3402 
3403 	return len;
3404 }
3405 EXPORT_SYMBOL_GPL(trace_vbprintk);
3406 
3407 __printf(3, 0)
3408 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3409 __trace_array_vprintk(struct trace_buffer *buffer,
3410 		      unsigned long ip, const char *fmt, va_list args)
3411 {
3412 	struct trace_event_call *call = &event_print;
3413 	struct ring_buffer_event *event;
3414 	int len = 0, size;
3415 	struct print_entry *entry;
3416 	unsigned int trace_ctx;
3417 	char *tbuffer;
3418 
3419 	if (tracing_disabled || tracing_selftest_running)
3420 		return 0;
3421 
3422 	/* Don't pollute graph traces with trace_vprintk internals */
3423 	pause_graph_tracing();
3424 
3425 	trace_ctx = tracing_gen_ctx();
3426 	preempt_disable_notrace();
3427 
3428 
3429 	tbuffer = get_trace_buf();
3430 	if (!tbuffer) {
3431 		len = 0;
3432 		goto out_nobuffer;
3433 	}
3434 
3435 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3436 
3437 	size = sizeof(*entry) + len + 1;
3438 	ring_buffer_nest_start(buffer);
3439 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3440 					    trace_ctx);
3441 	if (!event)
3442 		goto out;
3443 	entry = ring_buffer_event_data(event);
3444 	entry->ip = ip;
3445 
3446 	memcpy(&entry->buf, tbuffer, len + 1);
3447 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3448 		__buffer_unlock_commit(buffer, event);
3449 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3450 	}
3451 
3452 out:
3453 	ring_buffer_nest_end(buffer);
3454 	put_trace_buf();
3455 
3456 out_nobuffer:
3457 	preempt_enable_notrace();
3458 	unpause_graph_tracing();
3459 
3460 	return len;
3461 }
3462 
3463 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3464 int trace_array_vprintk(struct trace_array *tr,
3465 			unsigned long ip, const char *fmt, va_list args)
3466 {
3467 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3468 }
3469 
3470 /**
3471  * trace_array_printk - Print a message to a specific instance
3472  * @tr: The instance trace_array descriptor
3473  * @ip: The instruction pointer that this is called from.
3474  * @fmt: The format to print (printf format)
3475  *
3476  * If a subsystem sets up its own instance, they have the right to
3477  * printk strings into their tracing instance buffer using this
3478  * function. Note, this function will not write into the top level
3479  * buffer (use trace_printk() for that), as writing into the top level
3480  * buffer should only have events that can be individually disabled.
3481  * trace_printk() is only used for debugging a kernel, and should not
3482  * be ever incorporated in normal use.
3483  *
3484  * trace_array_printk() can be used, as it will not add noise to the
3485  * top level tracing buffer.
3486  *
3487  * Note, trace_array_init_printk() must be called on @tr before this
3488  * can be used.
3489  */
3490 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3491 int trace_array_printk(struct trace_array *tr,
3492 		       unsigned long ip, const char *fmt, ...)
3493 {
3494 	int ret;
3495 	va_list ap;
3496 
3497 	if (!tr)
3498 		return -ENOENT;
3499 
3500 	/* This is only allowed for created instances */
3501 	if (tr == &global_trace)
3502 		return 0;
3503 
3504 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3505 		return 0;
3506 
3507 	va_start(ap, fmt);
3508 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3509 	va_end(ap);
3510 	return ret;
3511 }
3512 EXPORT_SYMBOL_GPL(trace_array_printk);
3513 
3514 /**
3515  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3516  * @tr: The trace array to initialize the buffers for
3517  *
3518  * As trace_array_printk() only writes into instances, they are OK to
3519  * have in the kernel (unlike trace_printk()). This needs to be called
3520  * before trace_array_printk() can be used on a trace_array.
3521  */
trace_array_init_printk(struct trace_array * tr)3522 int trace_array_init_printk(struct trace_array *tr)
3523 {
3524 	if (!tr)
3525 		return -ENOENT;
3526 
3527 	/* This is only allowed for created instances */
3528 	if (tr == &global_trace)
3529 		return -EINVAL;
3530 
3531 	return alloc_percpu_trace_buffer();
3532 }
3533 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3534 
3535 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3536 int trace_array_printk_buf(struct trace_buffer *buffer,
3537 			   unsigned long ip, const char *fmt, ...)
3538 {
3539 	int ret;
3540 	va_list ap;
3541 
3542 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3543 		return 0;
3544 
3545 	va_start(ap, fmt);
3546 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3547 	va_end(ap);
3548 	return ret;
3549 }
3550 
3551 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3552 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3553 {
3554 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3555 }
3556 EXPORT_SYMBOL_GPL(trace_vprintk);
3557 
trace_iterator_increment(struct trace_iterator * iter)3558 static void trace_iterator_increment(struct trace_iterator *iter)
3559 {
3560 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3561 
3562 	iter->idx++;
3563 	if (buf_iter)
3564 		ring_buffer_iter_advance(buf_iter);
3565 }
3566 
3567 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3568 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3569 		unsigned long *lost_events)
3570 {
3571 	struct ring_buffer_event *event;
3572 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3573 
3574 	if (buf_iter) {
3575 		event = ring_buffer_iter_peek(buf_iter, ts);
3576 		if (lost_events)
3577 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3578 				(unsigned long)-1 : 0;
3579 	} else {
3580 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3581 					 lost_events);
3582 	}
3583 
3584 	if (event) {
3585 		iter->ent_size = ring_buffer_event_length(event);
3586 		return ring_buffer_event_data(event);
3587 	}
3588 	iter->ent_size = 0;
3589 	return NULL;
3590 }
3591 
3592 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3593 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3594 		  unsigned long *missing_events, u64 *ent_ts)
3595 {
3596 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3597 	struct trace_entry *ent, *next = NULL;
3598 	unsigned long lost_events = 0, next_lost = 0;
3599 	int cpu_file = iter->cpu_file;
3600 	u64 next_ts = 0, ts;
3601 	int next_cpu = -1;
3602 	int next_size = 0;
3603 	int cpu;
3604 
3605 	/*
3606 	 * If we are in a per_cpu trace file, don't bother by iterating over
3607 	 * all cpu and peek directly.
3608 	 */
3609 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3610 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3611 			return NULL;
3612 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3613 		if (ent_cpu)
3614 			*ent_cpu = cpu_file;
3615 
3616 		return ent;
3617 	}
3618 
3619 	for_each_tracing_cpu(cpu) {
3620 
3621 		if (ring_buffer_empty_cpu(buffer, cpu))
3622 			continue;
3623 
3624 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3625 
3626 		/*
3627 		 * Pick the entry with the smallest timestamp:
3628 		 */
3629 		if (ent && (!next || ts < next_ts)) {
3630 			next = ent;
3631 			next_cpu = cpu;
3632 			next_ts = ts;
3633 			next_lost = lost_events;
3634 			next_size = iter->ent_size;
3635 		}
3636 	}
3637 
3638 	iter->ent_size = next_size;
3639 
3640 	if (ent_cpu)
3641 		*ent_cpu = next_cpu;
3642 
3643 	if (ent_ts)
3644 		*ent_ts = next_ts;
3645 
3646 	if (missing_events)
3647 		*missing_events = next_lost;
3648 
3649 	return next;
3650 }
3651 
3652 #define STATIC_FMT_BUF_SIZE	128
3653 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3654 
trace_iter_expand_format(struct trace_iterator * iter)3655 static char *trace_iter_expand_format(struct trace_iterator *iter)
3656 {
3657 	char *tmp;
3658 
3659 	/*
3660 	 * iter->tr is NULL when used with tp_printk, which makes
3661 	 * this get called where it is not safe to call krealloc().
3662 	 */
3663 	if (!iter->tr || iter->fmt == static_fmt_buf)
3664 		return NULL;
3665 
3666 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3667 		       GFP_KERNEL);
3668 	if (tmp) {
3669 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3670 		iter->fmt = tmp;
3671 	}
3672 
3673 	return tmp;
3674 }
3675 
3676 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3677 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3678 			   bool star, int len)
3679 {
3680 	unsigned long addr = (unsigned long)str;
3681 	struct trace_event *trace_event;
3682 	struct trace_event_call *event;
3683 
3684 	/* Ignore strings with no length */
3685 	if (star && !len)
3686 		return true;
3687 
3688 	/* OK if part of the event data */
3689 	if ((addr >= (unsigned long)iter->ent) &&
3690 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3691 		return true;
3692 
3693 	/* OK if part of the temp seq buffer */
3694 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3695 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3696 		return true;
3697 
3698 	/* Core rodata can not be freed */
3699 	if (is_kernel_rodata(addr))
3700 		return true;
3701 
3702 	if (trace_is_tracepoint_string(str))
3703 		return true;
3704 
3705 	/*
3706 	 * Now this could be a module event, referencing core module
3707 	 * data, which is OK.
3708 	 */
3709 	if (!iter->ent)
3710 		return false;
3711 
3712 	trace_event = ftrace_find_event(iter->ent->type);
3713 	if (!trace_event)
3714 		return false;
3715 
3716 	event = container_of(trace_event, struct trace_event_call, event);
3717 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3718 		return false;
3719 
3720 	/* Would rather have rodata, but this will suffice */
3721 	if (within_module_core(addr, event->module))
3722 		return true;
3723 
3724 	return false;
3725 }
3726 
show_buffer(struct trace_seq * s)3727 static const char *show_buffer(struct trace_seq *s)
3728 {
3729 	struct seq_buf *seq = &s->seq;
3730 
3731 	seq_buf_terminate(seq);
3732 
3733 	return seq->buffer;
3734 }
3735 
3736 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3737 
test_can_verify_check(const char * fmt,...)3738 static int test_can_verify_check(const char *fmt, ...)
3739 {
3740 	char buf[16];
3741 	va_list ap;
3742 	int ret;
3743 
3744 	/*
3745 	 * The verifier is dependent on vsnprintf() modifies the va_list
3746 	 * passed to it, where it is sent as a reference. Some architectures
3747 	 * (like x86_32) passes it by value, which means that vsnprintf()
3748 	 * does not modify the va_list passed to it, and the verifier
3749 	 * would then need to be able to understand all the values that
3750 	 * vsnprintf can use. If it is passed by value, then the verifier
3751 	 * is disabled.
3752 	 */
3753 	va_start(ap, fmt);
3754 	vsnprintf(buf, 16, "%d", ap);
3755 	ret = va_arg(ap, int);
3756 	va_end(ap);
3757 
3758 	return ret;
3759 }
3760 
test_can_verify(void)3761 static void test_can_verify(void)
3762 {
3763 	if (!test_can_verify_check("%d %d", 0, 1)) {
3764 		pr_info("trace event string verifier disabled\n");
3765 		static_branch_inc(&trace_no_verify);
3766 	}
3767 }
3768 
3769 /**
3770  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3771  * @iter: The iterator that holds the seq buffer and the event being printed
3772  * @fmt: The format used to print the event
3773  * @ap: The va_list holding the data to print from @fmt.
3774  *
3775  * This writes the data into the @iter->seq buffer using the data from
3776  * @fmt and @ap. If the format has a %s, then the source of the string
3777  * is examined to make sure it is safe to print, otherwise it will
3778  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3779  * pointer.
3780  */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3781 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3782 			 va_list ap)
3783 {
3784 	const char *p = fmt;
3785 	const char *str;
3786 	int i, j;
3787 
3788 	if (WARN_ON_ONCE(!fmt))
3789 		return;
3790 
3791 	if (static_branch_unlikely(&trace_no_verify))
3792 		goto print;
3793 
3794 	/* Don't bother checking when doing a ftrace_dump() */
3795 	if (iter->fmt == static_fmt_buf)
3796 		goto print;
3797 
3798 	while (*p) {
3799 		bool star = false;
3800 		int len = 0;
3801 
3802 		j = 0;
3803 
3804 		/* We only care about %s and variants */
3805 		for (i = 0; p[i]; i++) {
3806 			if (i + 1 >= iter->fmt_size) {
3807 				/*
3808 				 * If we can't expand the copy buffer,
3809 				 * just print it.
3810 				 */
3811 				if (!trace_iter_expand_format(iter))
3812 					goto print;
3813 			}
3814 
3815 			if (p[i] == '\\' && p[i+1]) {
3816 				i++;
3817 				continue;
3818 			}
3819 			if (p[i] == '%') {
3820 				/* Need to test cases like %08.*s */
3821 				for (j = 1; p[i+j]; j++) {
3822 					if (isdigit(p[i+j]) ||
3823 					    p[i+j] == '.')
3824 						continue;
3825 					if (p[i+j] == '*') {
3826 						star = true;
3827 						continue;
3828 					}
3829 					break;
3830 				}
3831 				if (p[i+j] == 's')
3832 					break;
3833 				star = false;
3834 			}
3835 			j = 0;
3836 		}
3837 		/* If no %s found then just print normally */
3838 		if (!p[i])
3839 			break;
3840 
3841 		/* Copy up to the %s, and print that */
3842 		strncpy(iter->fmt, p, i);
3843 		iter->fmt[i] = '\0';
3844 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3845 
3846 		/*
3847 		 * If iter->seq is full, the above call no longer guarantees
3848 		 * that ap is in sync with fmt processing, and further calls
3849 		 * to va_arg() can return wrong positional arguments.
3850 		 *
3851 		 * Ensure that ap is no longer used in this case.
3852 		 */
3853 		if (iter->seq.full) {
3854 			p = "";
3855 			break;
3856 		}
3857 
3858 		if (star)
3859 			len = va_arg(ap, int);
3860 
3861 		/* The ap now points to the string data of the %s */
3862 		str = va_arg(ap, const char *);
3863 
3864 		/*
3865 		 * If you hit this warning, it is likely that the
3866 		 * trace event in question used %s on a string that
3867 		 * was saved at the time of the event, but may not be
3868 		 * around when the trace is read. Use __string(),
3869 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3870 		 * instead. See samples/trace_events/trace-events-sample.h
3871 		 * for reference.
3872 		 */
3873 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3874 			      "fmt: '%s' current_buffer: '%s'",
3875 			      fmt, show_buffer(&iter->seq))) {
3876 			int ret;
3877 
3878 			/* Try to safely read the string */
3879 			if (star) {
3880 				if (len + 1 > iter->fmt_size)
3881 					len = iter->fmt_size - 1;
3882 				if (len < 0)
3883 					len = 0;
3884 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3885 				iter->fmt[len] = 0;
3886 				star = false;
3887 			} else {
3888 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3889 								  iter->fmt_size);
3890 			}
3891 			if (ret < 0)
3892 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3893 			else
3894 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3895 						 str, iter->fmt);
3896 			str = "[UNSAFE-MEMORY]";
3897 			strcpy(iter->fmt, "%s");
3898 		} else {
3899 			strncpy(iter->fmt, p + i, j + 1);
3900 			iter->fmt[j+1] = '\0';
3901 		}
3902 		if (star)
3903 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3904 		else
3905 			trace_seq_printf(&iter->seq, iter->fmt, str);
3906 
3907 		p += i + j + 1;
3908 	}
3909  print:
3910 	if (*p)
3911 		trace_seq_vprintf(&iter->seq, p, ap);
3912 }
3913 
trace_event_format(struct trace_iterator * iter,const char * fmt)3914 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3915 {
3916 	const char *p, *new_fmt;
3917 	char *q;
3918 
3919 	if (WARN_ON_ONCE(!fmt))
3920 		return fmt;
3921 
3922 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3923 		return fmt;
3924 
3925 	p = fmt;
3926 	new_fmt = q = iter->fmt;
3927 	while (*p) {
3928 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3929 			if (!trace_iter_expand_format(iter))
3930 				return fmt;
3931 
3932 			q += iter->fmt - new_fmt;
3933 			new_fmt = iter->fmt;
3934 		}
3935 
3936 		*q++ = *p++;
3937 
3938 		/* Replace %p with %px */
3939 		if (p[-1] == '%') {
3940 			if (p[0] == '%') {
3941 				*q++ = *p++;
3942 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3943 				*q++ = *p++;
3944 				*q++ = 'x';
3945 			}
3946 		}
3947 	}
3948 	*q = '\0';
3949 
3950 	return new_fmt;
3951 }
3952 
3953 #define STATIC_TEMP_BUF_SIZE	128
3954 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3955 
3956 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3957 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3958 					  int *ent_cpu, u64 *ent_ts)
3959 {
3960 	/* __find_next_entry will reset ent_size */
3961 	int ent_size = iter->ent_size;
3962 	struct trace_entry *entry;
3963 
3964 	/*
3965 	 * If called from ftrace_dump(), then the iter->temp buffer
3966 	 * will be the static_temp_buf and not created from kmalloc.
3967 	 * If the entry size is greater than the buffer, we can
3968 	 * not save it. Just return NULL in that case. This is only
3969 	 * used to add markers when two consecutive events' time
3970 	 * stamps have a large delta. See trace_print_lat_context()
3971 	 */
3972 	if (iter->temp == static_temp_buf &&
3973 	    STATIC_TEMP_BUF_SIZE < ent_size)
3974 		return NULL;
3975 
3976 	/*
3977 	 * The __find_next_entry() may call peek_next_entry(), which may
3978 	 * call ring_buffer_peek() that may make the contents of iter->ent
3979 	 * undefined. Need to copy iter->ent now.
3980 	 */
3981 	if (iter->ent && iter->ent != iter->temp) {
3982 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3983 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3984 			void *temp;
3985 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3986 			if (!temp)
3987 				return NULL;
3988 			kfree(iter->temp);
3989 			iter->temp = temp;
3990 			iter->temp_size = iter->ent_size;
3991 		}
3992 		memcpy(iter->temp, iter->ent, iter->ent_size);
3993 		iter->ent = iter->temp;
3994 	}
3995 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3996 	/* Put back the original ent_size */
3997 	iter->ent_size = ent_size;
3998 
3999 	return entry;
4000 }
4001 
4002 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4003 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4004 {
4005 	iter->ent = __find_next_entry(iter, &iter->cpu,
4006 				      &iter->lost_events, &iter->ts);
4007 
4008 	if (iter->ent)
4009 		trace_iterator_increment(iter);
4010 
4011 	return iter->ent ? iter : NULL;
4012 }
4013 
trace_consume(struct trace_iterator * iter)4014 static void trace_consume(struct trace_iterator *iter)
4015 {
4016 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4017 			    &iter->lost_events);
4018 }
4019 
s_next(struct seq_file * m,void * v,loff_t * pos)4020 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4021 {
4022 	struct trace_iterator *iter = m->private;
4023 	int i = (int)*pos;
4024 	void *ent;
4025 
4026 	WARN_ON_ONCE(iter->leftover);
4027 
4028 	(*pos)++;
4029 
4030 	/* can't go backwards */
4031 	if (iter->idx > i)
4032 		return NULL;
4033 
4034 	if (iter->idx < 0)
4035 		ent = trace_find_next_entry_inc(iter);
4036 	else
4037 		ent = iter;
4038 
4039 	while (ent && iter->idx < i)
4040 		ent = trace_find_next_entry_inc(iter);
4041 
4042 	iter->pos = *pos;
4043 
4044 	return ent;
4045 }
4046 
tracing_iter_reset(struct trace_iterator * iter,int cpu)4047 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4048 {
4049 	struct ring_buffer_iter *buf_iter;
4050 	unsigned long entries = 0;
4051 	u64 ts;
4052 
4053 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4054 
4055 	buf_iter = trace_buffer_iter(iter, cpu);
4056 	if (!buf_iter)
4057 		return;
4058 
4059 	ring_buffer_iter_reset(buf_iter);
4060 
4061 	/*
4062 	 * We could have the case with the max latency tracers
4063 	 * that a reset never took place on a cpu. This is evident
4064 	 * by the timestamp being before the start of the buffer.
4065 	 */
4066 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4067 		if (ts >= iter->array_buffer->time_start)
4068 			break;
4069 		entries++;
4070 		ring_buffer_iter_advance(buf_iter);
4071 	}
4072 
4073 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4074 }
4075 
4076 /*
4077  * The current tracer is copied to avoid a global locking
4078  * all around.
4079  */
s_start(struct seq_file * m,loff_t * pos)4080 static void *s_start(struct seq_file *m, loff_t *pos)
4081 {
4082 	struct trace_iterator *iter = m->private;
4083 	struct trace_array *tr = iter->tr;
4084 	int cpu_file = iter->cpu_file;
4085 	void *p = NULL;
4086 	loff_t l = 0;
4087 	int cpu;
4088 
4089 	/*
4090 	 * copy the tracer to avoid using a global lock all around.
4091 	 * iter->trace is a copy of current_trace, the pointer to the
4092 	 * name may be used instead of a strcmp(), as iter->trace->name
4093 	 * will point to the same string as current_trace->name.
4094 	 */
4095 	mutex_lock(&trace_types_lock);
4096 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4097 		/* Close iter->trace before switching to the new current tracer */
4098 		if (iter->trace->close)
4099 			iter->trace->close(iter);
4100 		*iter->trace = *tr->current_trace;
4101 		/* Reopen the new current tracer */
4102 		if (iter->trace->open)
4103 			iter->trace->open(iter);
4104 	}
4105 	mutex_unlock(&trace_types_lock);
4106 
4107 #ifdef CONFIG_TRACER_MAX_TRACE
4108 	if (iter->snapshot && iter->trace->use_max_tr)
4109 		return ERR_PTR(-EBUSY);
4110 #endif
4111 
4112 	if (*pos != iter->pos) {
4113 		iter->ent = NULL;
4114 		iter->cpu = 0;
4115 		iter->idx = -1;
4116 
4117 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4118 			for_each_tracing_cpu(cpu)
4119 				tracing_iter_reset(iter, cpu);
4120 		} else
4121 			tracing_iter_reset(iter, cpu_file);
4122 
4123 		iter->leftover = 0;
4124 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4125 			;
4126 
4127 	} else {
4128 		/*
4129 		 * If we overflowed the seq_file before, then we want
4130 		 * to just reuse the trace_seq buffer again.
4131 		 */
4132 		if (iter->leftover)
4133 			p = iter;
4134 		else {
4135 			l = *pos - 1;
4136 			p = s_next(m, p, &l);
4137 		}
4138 	}
4139 
4140 	trace_event_read_lock();
4141 	trace_access_lock(cpu_file);
4142 	return p;
4143 }
4144 
s_stop(struct seq_file * m,void * p)4145 static void s_stop(struct seq_file *m, void *p)
4146 {
4147 	struct trace_iterator *iter = m->private;
4148 
4149 #ifdef CONFIG_TRACER_MAX_TRACE
4150 	if (iter->snapshot && iter->trace->use_max_tr)
4151 		return;
4152 #endif
4153 
4154 	trace_access_unlock(iter->cpu_file);
4155 	trace_event_read_unlock();
4156 }
4157 
4158 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4159 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4160 		      unsigned long *entries, int cpu)
4161 {
4162 	unsigned long count;
4163 
4164 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4165 	/*
4166 	 * If this buffer has skipped entries, then we hold all
4167 	 * entries for the trace and we need to ignore the
4168 	 * ones before the time stamp.
4169 	 */
4170 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4171 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4172 		/* total is the same as the entries */
4173 		*total = count;
4174 	} else
4175 		*total = count +
4176 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4177 	*entries = count;
4178 }
4179 
4180 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4181 get_total_entries(struct array_buffer *buf,
4182 		  unsigned long *total, unsigned long *entries)
4183 {
4184 	unsigned long t, e;
4185 	int cpu;
4186 
4187 	*total = 0;
4188 	*entries = 0;
4189 
4190 	for_each_tracing_cpu(cpu) {
4191 		get_total_entries_cpu(buf, &t, &e, cpu);
4192 		*total += t;
4193 		*entries += e;
4194 	}
4195 }
4196 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4197 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4198 {
4199 	unsigned long total, entries;
4200 
4201 	if (!tr)
4202 		tr = &global_trace;
4203 
4204 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4205 
4206 	return entries;
4207 }
4208 
trace_total_entries(struct trace_array * tr)4209 unsigned long trace_total_entries(struct trace_array *tr)
4210 {
4211 	unsigned long total, entries;
4212 
4213 	if (!tr)
4214 		tr = &global_trace;
4215 
4216 	get_total_entries(&tr->array_buffer, &total, &entries);
4217 
4218 	return entries;
4219 }
4220 
print_lat_help_header(struct seq_file * m)4221 static void print_lat_help_header(struct seq_file *m)
4222 {
4223 	seq_puts(m, "#                    _------=> CPU#            \n"
4224 		    "#                   / _-----=> irqs-off        \n"
4225 		    "#                  | / _----=> need-resched    \n"
4226 		    "#                  || / _---=> hardirq/softirq \n"
4227 		    "#                  ||| / _--=> preempt-depth   \n"
4228 		    "#                  |||| / _-=> migrate-disable \n"
4229 		    "#                  ||||| /     delay           \n"
4230 		    "#  cmd     pid     |||||| time  |   caller     \n"
4231 		    "#     \\   /        ||||||  \\    |    /       \n");
4232 }
4233 
print_event_info(struct array_buffer * buf,struct seq_file * m)4234 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4235 {
4236 	unsigned long total;
4237 	unsigned long entries;
4238 
4239 	get_total_entries(buf, &total, &entries);
4240 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4241 		   entries, total, num_online_cpus());
4242 	seq_puts(m, "#\n");
4243 }
4244 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4245 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4246 				   unsigned int flags)
4247 {
4248 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4249 
4250 	print_event_info(buf, m);
4251 
4252 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4253 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4254 }
4255 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4256 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4257 				       unsigned int flags)
4258 {
4259 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4260 	const char *space = "            ";
4261 	int prec = tgid ? 12 : 2;
4262 
4263 	print_event_info(buf, m);
4264 
4265 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4266 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4267 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4268 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4269 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4270 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4271 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4272 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4273 }
4274 
4275 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4276 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4277 {
4278 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4279 	struct array_buffer *buf = iter->array_buffer;
4280 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4281 	struct tracer *type = iter->trace;
4282 	unsigned long entries;
4283 	unsigned long total;
4284 	const char *name = "preemption";
4285 
4286 	name = type->name;
4287 
4288 	get_total_entries(buf, &total, &entries);
4289 
4290 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4291 		   name, UTS_RELEASE);
4292 	seq_puts(m, "# -----------------------------------"
4293 		 "---------------------------------\n");
4294 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4295 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4296 		   nsecs_to_usecs(data->saved_latency),
4297 		   entries,
4298 		   total,
4299 		   buf->cpu,
4300 #if defined(CONFIG_PREEMPT_NONE)
4301 		   "server",
4302 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4303 		   "desktop",
4304 #elif defined(CONFIG_PREEMPT)
4305 		   "preempt",
4306 #elif defined(CONFIG_PREEMPT_RT)
4307 		   "preempt_rt",
4308 #else
4309 		   "unknown",
4310 #endif
4311 		   /* These are reserved for later use */
4312 		   0, 0, 0, 0);
4313 #ifdef CONFIG_SMP
4314 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4315 #else
4316 	seq_puts(m, ")\n");
4317 #endif
4318 	seq_puts(m, "#    -----------------\n");
4319 	seq_printf(m, "#    | task: %.16s-%d "
4320 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4321 		   data->comm, data->pid,
4322 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4323 		   data->policy, data->rt_priority);
4324 	seq_puts(m, "#    -----------------\n");
4325 
4326 	if (data->critical_start) {
4327 		seq_puts(m, "#  => started at: ");
4328 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4329 		trace_print_seq(m, &iter->seq);
4330 		seq_puts(m, "\n#  => ended at:   ");
4331 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4332 		trace_print_seq(m, &iter->seq);
4333 		seq_puts(m, "\n#\n");
4334 	}
4335 
4336 	seq_puts(m, "#\n");
4337 }
4338 
test_cpu_buff_start(struct trace_iterator * iter)4339 static void test_cpu_buff_start(struct trace_iterator *iter)
4340 {
4341 	struct trace_seq *s = &iter->seq;
4342 	struct trace_array *tr = iter->tr;
4343 
4344 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4345 		return;
4346 
4347 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4348 		return;
4349 
4350 	if (cpumask_available(iter->started) &&
4351 	    cpumask_test_cpu(iter->cpu, iter->started))
4352 		return;
4353 
4354 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4355 		return;
4356 
4357 	if (cpumask_available(iter->started))
4358 		cpumask_set_cpu(iter->cpu, iter->started);
4359 
4360 	/* Don't print started cpu buffer for the first entry of the trace */
4361 	if (iter->idx > 1)
4362 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4363 				iter->cpu);
4364 }
4365 
print_trace_fmt(struct trace_iterator * iter)4366 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4367 {
4368 	struct trace_array *tr = iter->tr;
4369 	struct trace_seq *s = &iter->seq;
4370 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4371 	struct trace_entry *entry;
4372 	struct trace_event *event;
4373 
4374 	entry = iter->ent;
4375 
4376 	test_cpu_buff_start(iter);
4377 
4378 	event = ftrace_find_event(entry->type);
4379 
4380 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4381 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4382 			trace_print_lat_context(iter);
4383 		else
4384 			trace_print_context(iter);
4385 	}
4386 
4387 	if (trace_seq_has_overflowed(s))
4388 		return TRACE_TYPE_PARTIAL_LINE;
4389 
4390 	if (event)
4391 		return event->funcs->trace(iter, sym_flags, event);
4392 
4393 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4394 
4395 	return trace_handle_return(s);
4396 }
4397 
print_raw_fmt(struct trace_iterator * iter)4398 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4399 {
4400 	struct trace_array *tr = iter->tr;
4401 	struct trace_seq *s = &iter->seq;
4402 	struct trace_entry *entry;
4403 	struct trace_event *event;
4404 
4405 	entry = iter->ent;
4406 
4407 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4408 		trace_seq_printf(s, "%d %d %llu ",
4409 				 entry->pid, iter->cpu, iter->ts);
4410 
4411 	if (trace_seq_has_overflowed(s))
4412 		return TRACE_TYPE_PARTIAL_LINE;
4413 
4414 	event = ftrace_find_event(entry->type);
4415 	if (event)
4416 		return event->funcs->raw(iter, 0, event);
4417 
4418 	trace_seq_printf(s, "%d ?\n", entry->type);
4419 
4420 	return trace_handle_return(s);
4421 }
4422 
print_hex_fmt(struct trace_iterator * iter)4423 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4424 {
4425 	struct trace_array *tr = iter->tr;
4426 	struct trace_seq *s = &iter->seq;
4427 	unsigned char newline = '\n';
4428 	struct trace_entry *entry;
4429 	struct trace_event *event;
4430 
4431 	entry = iter->ent;
4432 
4433 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4434 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4435 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4436 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4437 		if (trace_seq_has_overflowed(s))
4438 			return TRACE_TYPE_PARTIAL_LINE;
4439 	}
4440 
4441 	event = ftrace_find_event(entry->type);
4442 	if (event) {
4443 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4444 		if (ret != TRACE_TYPE_HANDLED)
4445 			return ret;
4446 	}
4447 
4448 	SEQ_PUT_FIELD(s, newline);
4449 
4450 	return trace_handle_return(s);
4451 }
4452 
print_bin_fmt(struct trace_iterator * iter)4453 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4454 {
4455 	struct trace_array *tr = iter->tr;
4456 	struct trace_seq *s = &iter->seq;
4457 	struct trace_entry *entry;
4458 	struct trace_event *event;
4459 
4460 	entry = iter->ent;
4461 
4462 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4463 		SEQ_PUT_FIELD(s, entry->pid);
4464 		SEQ_PUT_FIELD(s, iter->cpu);
4465 		SEQ_PUT_FIELD(s, iter->ts);
4466 		if (trace_seq_has_overflowed(s))
4467 			return TRACE_TYPE_PARTIAL_LINE;
4468 	}
4469 
4470 	event = ftrace_find_event(entry->type);
4471 	return event ? event->funcs->binary(iter, 0, event) :
4472 		TRACE_TYPE_HANDLED;
4473 }
4474 
trace_empty(struct trace_iterator * iter)4475 int trace_empty(struct trace_iterator *iter)
4476 {
4477 	struct ring_buffer_iter *buf_iter;
4478 	int cpu;
4479 
4480 	/* If we are looking at one CPU buffer, only check that one */
4481 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4482 		cpu = iter->cpu_file;
4483 		buf_iter = trace_buffer_iter(iter, cpu);
4484 		if (buf_iter) {
4485 			if (!ring_buffer_iter_empty(buf_iter))
4486 				return 0;
4487 		} else {
4488 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4489 				return 0;
4490 		}
4491 		return 1;
4492 	}
4493 
4494 	for_each_tracing_cpu(cpu) {
4495 		buf_iter = trace_buffer_iter(iter, cpu);
4496 		if (buf_iter) {
4497 			if (!ring_buffer_iter_empty(buf_iter))
4498 				return 0;
4499 		} else {
4500 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4501 				return 0;
4502 		}
4503 	}
4504 
4505 	return 1;
4506 }
4507 
4508 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4509 enum print_line_t print_trace_line(struct trace_iterator *iter)
4510 {
4511 	struct trace_array *tr = iter->tr;
4512 	unsigned long trace_flags = tr->trace_flags;
4513 	enum print_line_t ret;
4514 
4515 	if (iter->lost_events) {
4516 		if (iter->lost_events == (unsigned long)-1)
4517 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4518 					 iter->cpu);
4519 		else
4520 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4521 					 iter->cpu, iter->lost_events);
4522 		if (trace_seq_has_overflowed(&iter->seq))
4523 			return TRACE_TYPE_PARTIAL_LINE;
4524 	}
4525 
4526 	if (iter->trace && iter->trace->print_line) {
4527 		ret = iter->trace->print_line(iter);
4528 		if (ret != TRACE_TYPE_UNHANDLED)
4529 			return ret;
4530 	}
4531 
4532 	if (iter->ent->type == TRACE_BPUTS &&
4533 			trace_flags & TRACE_ITER_PRINTK &&
4534 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4535 		return trace_print_bputs_msg_only(iter);
4536 
4537 	if (iter->ent->type == TRACE_BPRINT &&
4538 			trace_flags & TRACE_ITER_PRINTK &&
4539 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4540 		return trace_print_bprintk_msg_only(iter);
4541 
4542 	if (iter->ent->type == TRACE_PRINT &&
4543 			trace_flags & TRACE_ITER_PRINTK &&
4544 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4545 		return trace_print_printk_msg_only(iter);
4546 
4547 	if (trace_flags & TRACE_ITER_BIN)
4548 		return print_bin_fmt(iter);
4549 
4550 	if (trace_flags & TRACE_ITER_HEX)
4551 		return print_hex_fmt(iter);
4552 
4553 	if (trace_flags & TRACE_ITER_RAW)
4554 		return print_raw_fmt(iter);
4555 
4556 	return print_trace_fmt(iter);
4557 }
4558 
trace_latency_header(struct seq_file * m)4559 void trace_latency_header(struct seq_file *m)
4560 {
4561 	struct trace_iterator *iter = m->private;
4562 	struct trace_array *tr = iter->tr;
4563 
4564 	/* print nothing if the buffers are empty */
4565 	if (trace_empty(iter))
4566 		return;
4567 
4568 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4569 		print_trace_header(m, iter);
4570 
4571 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4572 		print_lat_help_header(m);
4573 }
4574 
trace_default_header(struct seq_file * m)4575 void trace_default_header(struct seq_file *m)
4576 {
4577 	struct trace_iterator *iter = m->private;
4578 	struct trace_array *tr = iter->tr;
4579 	unsigned long trace_flags = tr->trace_flags;
4580 
4581 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4582 		return;
4583 
4584 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4585 		/* print nothing if the buffers are empty */
4586 		if (trace_empty(iter))
4587 			return;
4588 		print_trace_header(m, iter);
4589 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4590 			print_lat_help_header(m);
4591 	} else {
4592 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4593 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4594 				print_func_help_header_irq(iter->array_buffer,
4595 							   m, trace_flags);
4596 			else
4597 				print_func_help_header(iter->array_buffer, m,
4598 						       trace_flags);
4599 		}
4600 	}
4601 }
4602 
test_ftrace_alive(struct seq_file * m)4603 static void test_ftrace_alive(struct seq_file *m)
4604 {
4605 	if (!ftrace_is_dead())
4606 		return;
4607 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4608 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4609 }
4610 
4611 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4612 static void show_snapshot_main_help(struct seq_file *m)
4613 {
4614 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4615 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4616 		    "#                      Takes a snapshot of the main buffer.\n"
4617 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4618 		    "#                      (Doesn't have to be '2' works with any number that\n"
4619 		    "#                       is not a '0' or '1')\n");
4620 }
4621 
show_snapshot_percpu_help(struct seq_file * m)4622 static void show_snapshot_percpu_help(struct seq_file *m)
4623 {
4624 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4625 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4626 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4627 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4628 #else
4629 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4630 		    "#                     Must use main snapshot file to allocate.\n");
4631 #endif
4632 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4633 		    "#                      (Doesn't have to be '2' works with any number that\n"
4634 		    "#                       is not a '0' or '1')\n");
4635 }
4636 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4637 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4638 {
4639 	if (iter->tr->allocated_snapshot)
4640 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4641 	else
4642 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4643 
4644 	seq_puts(m, "# Snapshot commands:\n");
4645 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4646 		show_snapshot_main_help(m);
4647 	else
4648 		show_snapshot_percpu_help(m);
4649 }
4650 #else
4651 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4652 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4653 #endif
4654 
s_show(struct seq_file * m,void * v)4655 static int s_show(struct seq_file *m, void *v)
4656 {
4657 	struct trace_iterator *iter = v;
4658 	int ret;
4659 
4660 	if (iter->ent == NULL) {
4661 		if (iter->tr) {
4662 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4663 			seq_puts(m, "#\n");
4664 			test_ftrace_alive(m);
4665 		}
4666 		if (iter->snapshot && trace_empty(iter))
4667 			print_snapshot_help(m, iter);
4668 		else if (iter->trace && iter->trace->print_header)
4669 			iter->trace->print_header(m);
4670 		else
4671 			trace_default_header(m);
4672 
4673 	} else if (iter->leftover) {
4674 		/*
4675 		 * If we filled the seq_file buffer earlier, we
4676 		 * want to just show it now.
4677 		 */
4678 		ret = trace_print_seq(m, &iter->seq);
4679 
4680 		/* ret should this time be zero, but you never know */
4681 		iter->leftover = ret;
4682 
4683 	} else {
4684 		ret = print_trace_line(iter);
4685 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4686 			iter->seq.full = 0;
4687 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4688 		}
4689 		ret = trace_print_seq(m, &iter->seq);
4690 		/*
4691 		 * If we overflow the seq_file buffer, then it will
4692 		 * ask us for this data again at start up.
4693 		 * Use that instead.
4694 		 *  ret is 0 if seq_file write succeeded.
4695 		 *        -1 otherwise.
4696 		 */
4697 		iter->leftover = ret;
4698 	}
4699 
4700 	return 0;
4701 }
4702 
4703 /*
4704  * Should be used after trace_array_get(), trace_types_lock
4705  * ensures that i_cdev was already initialized.
4706  */
tracing_get_cpu(struct inode * inode)4707 static inline int tracing_get_cpu(struct inode *inode)
4708 {
4709 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4710 		return (long)inode->i_cdev - 1;
4711 	return RING_BUFFER_ALL_CPUS;
4712 }
4713 
4714 static const struct seq_operations tracer_seq_ops = {
4715 	.start		= s_start,
4716 	.next		= s_next,
4717 	.stop		= s_stop,
4718 	.show		= s_show,
4719 };
4720 
4721 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4722 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4723 {
4724 	struct trace_array *tr = inode->i_private;
4725 	struct trace_iterator *iter;
4726 	int cpu;
4727 
4728 	if (tracing_disabled)
4729 		return ERR_PTR(-ENODEV);
4730 
4731 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4732 	if (!iter)
4733 		return ERR_PTR(-ENOMEM);
4734 
4735 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4736 				    GFP_KERNEL);
4737 	if (!iter->buffer_iter)
4738 		goto release;
4739 
4740 	/*
4741 	 * trace_find_next_entry() may need to save off iter->ent.
4742 	 * It will place it into the iter->temp buffer. As most
4743 	 * events are less than 128, allocate a buffer of that size.
4744 	 * If one is greater, then trace_find_next_entry() will
4745 	 * allocate a new buffer to adjust for the bigger iter->ent.
4746 	 * It's not critical if it fails to get allocated here.
4747 	 */
4748 	iter->temp = kmalloc(128, GFP_KERNEL);
4749 	if (iter->temp)
4750 		iter->temp_size = 128;
4751 
4752 	/*
4753 	 * trace_event_printf() may need to modify given format
4754 	 * string to replace %p with %px so that it shows real address
4755 	 * instead of hash value. However, that is only for the event
4756 	 * tracing, other tracer may not need. Defer the allocation
4757 	 * until it is needed.
4758 	 */
4759 	iter->fmt = NULL;
4760 	iter->fmt_size = 0;
4761 
4762 	/*
4763 	 * We make a copy of the current tracer to avoid concurrent
4764 	 * changes on it while we are reading.
4765 	 */
4766 	mutex_lock(&trace_types_lock);
4767 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4768 	if (!iter->trace)
4769 		goto fail;
4770 
4771 	*iter->trace = *tr->current_trace;
4772 
4773 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4774 		goto fail;
4775 
4776 	iter->tr = tr;
4777 
4778 #ifdef CONFIG_TRACER_MAX_TRACE
4779 	/* Currently only the top directory has a snapshot */
4780 	if (tr->current_trace->print_max || snapshot)
4781 		iter->array_buffer = &tr->max_buffer;
4782 	else
4783 #endif
4784 		iter->array_buffer = &tr->array_buffer;
4785 	iter->snapshot = snapshot;
4786 	iter->pos = -1;
4787 	iter->cpu_file = tracing_get_cpu(inode);
4788 	mutex_init(&iter->mutex);
4789 
4790 	/* Notify the tracer early; before we stop tracing. */
4791 	if (iter->trace->open)
4792 		iter->trace->open(iter);
4793 
4794 	/* Annotate start of buffers if we had overruns */
4795 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4796 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4797 
4798 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4799 	if (trace_clocks[tr->clock_id].in_ns)
4800 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4801 
4802 	/*
4803 	 * If pause-on-trace is enabled, then stop the trace while
4804 	 * dumping, unless this is the "snapshot" file
4805 	 */
4806 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4807 		tracing_stop_tr(tr);
4808 
4809 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4810 		for_each_tracing_cpu(cpu) {
4811 			iter->buffer_iter[cpu] =
4812 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4813 							 cpu, GFP_KERNEL);
4814 		}
4815 		ring_buffer_read_prepare_sync();
4816 		for_each_tracing_cpu(cpu) {
4817 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4818 			tracing_iter_reset(iter, cpu);
4819 		}
4820 	} else {
4821 		cpu = iter->cpu_file;
4822 		iter->buffer_iter[cpu] =
4823 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4824 						 cpu, GFP_KERNEL);
4825 		ring_buffer_read_prepare_sync();
4826 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4827 		tracing_iter_reset(iter, cpu);
4828 	}
4829 
4830 	mutex_unlock(&trace_types_lock);
4831 
4832 	return iter;
4833 
4834  fail:
4835 	mutex_unlock(&trace_types_lock);
4836 	kfree(iter->trace);
4837 	kfree(iter->temp);
4838 	kfree(iter->buffer_iter);
4839 release:
4840 	seq_release_private(inode, file);
4841 	return ERR_PTR(-ENOMEM);
4842 }
4843 
tracing_open_generic(struct inode * inode,struct file * filp)4844 int tracing_open_generic(struct inode *inode, struct file *filp)
4845 {
4846 	int ret;
4847 
4848 	ret = tracing_check_open_get_tr(NULL);
4849 	if (ret)
4850 		return ret;
4851 
4852 	filp->private_data = inode->i_private;
4853 	return 0;
4854 }
4855 
tracing_is_disabled(void)4856 bool tracing_is_disabled(void)
4857 {
4858 	return (tracing_disabled) ? true: false;
4859 }
4860 
4861 /*
4862  * Open and update trace_array ref count.
4863  * Must have the current trace_array passed to it.
4864  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4865 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4866 {
4867 	struct trace_array *tr = inode->i_private;
4868 	int ret;
4869 
4870 	ret = tracing_check_open_get_tr(tr);
4871 	if (ret)
4872 		return ret;
4873 
4874 	filp->private_data = inode->i_private;
4875 
4876 	return 0;
4877 }
4878 
4879 /*
4880  * The private pointer of the inode is the trace_event_file.
4881  * Update the tr ref count associated to it.
4882  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4883 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4884 {
4885 	struct trace_event_file *file = inode->i_private;
4886 	int ret;
4887 
4888 	ret = tracing_check_open_get_tr(file->tr);
4889 	if (ret)
4890 		return ret;
4891 
4892 	filp->private_data = inode->i_private;
4893 
4894 	return 0;
4895 }
4896 
tracing_release_file_tr(struct inode * inode,struct file * filp)4897 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4898 {
4899 	struct trace_event_file *file = inode->i_private;
4900 
4901 	trace_array_put(file->tr);
4902 
4903 	return 0;
4904 }
4905 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4906 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4907 {
4908 	tracing_release_file_tr(inode, filp);
4909 	return single_release(inode, filp);
4910 }
4911 
tracing_mark_open(struct inode * inode,struct file * filp)4912 static int tracing_mark_open(struct inode *inode, struct file *filp)
4913 {
4914 	stream_open(inode, filp);
4915 	return tracing_open_generic_tr(inode, filp);
4916 }
4917 
tracing_release(struct inode * inode,struct file * file)4918 static int tracing_release(struct inode *inode, struct file *file)
4919 {
4920 	struct trace_array *tr = inode->i_private;
4921 	struct seq_file *m = file->private_data;
4922 	struct trace_iterator *iter;
4923 	int cpu;
4924 
4925 	if (!(file->f_mode & FMODE_READ)) {
4926 		trace_array_put(tr);
4927 		return 0;
4928 	}
4929 
4930 	/* Writes do not use seq_file */
4931 	iter = m->private;
4932 	mutex_lock(&trace_types_lock);
4933 
4934 	for_each_tracing_cpu(cpu) {
4935 		if (iter->buffer_iter[cpu])
4936 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4937 	}
4938 
4939 	if (iter->trace && iter->trace->close)
4940 		iter->trace->close(iter);
4941 
4942 	if (!iter->snapshot && tr->stop_count)
4943 		/* reenable tracing if it was previously enabled */
4944 		tracing_start_tr(tr);
4945 
4946 	__trace_array_put(tr);
4947 
4948 	mutex_unlock(&trace_types_lock);
4949 
4950 	mutex_destroy(&iter->mutex);
4951 	free_cpumask_var(iter->started);
4952 	kfree(iter->fmt);
4953 	kfree(iter->temp);
4954 	kfree(iter->trace);
4955 	kfree(iter->buffer_iter);
4956 	seq_release_private(inode, file);
4957 
4958 	return 0;
4959 }
4960 
tracing_release_generic_tr(struct inode * inode,struct file * file)4961 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4962 {
4963 	struct trace_array *tr = inode->i_private;
4964 
4965 	trace_array_put(tr);
4966 	return 0;
4967 }
4968 
tracing_single_release_tr(struct inode * inode,struct file * file)4969 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4970 {
4971 	struct trace_array *tr = inode->i_private;
4972 
4973 	trace_array_put(tr);
4974 
4975 	return single_release(inode, file);
4976 }
4977 
tracing_open(struct inode * inode,struct file * file)4978 static int tracing_open(struct inode *inode, struct file *file)
4979 {
4980 	struct trace_array *tr = inode->i_private;
4981 	struct trace_iterator *iter;
4982 	int ret;
4983 
4984 	ret = tracing_check_open_get_tr(tr);
4985 	if (ret)
4986 		return ret;
4987 
4988 	/* If this file was open for write, then erase contents */
4989 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4990 		int cpu = tracing_get_cpu(inode);
4991 		struct array_buffer *trace_buf = &tr->array_buffer;
4992 
4993 #ifdef CONFIG_TRACER_MAX_TRACE
4994 		if (tr->current_trace->print_max)
4995 			trace_buf = &tr->max_buffer;
4996 #endif
4997 
4998 		if (cpu == RING_BUFFER_ALL_CPUS)
4999 			tracing_reset_online_cpus(trace_buf);
5000 		else
5001 			tracing_reset_cpu(trace_buf, cpu);
5002 	}
5003 
5004 	if (file->f_mode & FMODE_READ) {
5005 		iter = __tracing_open(inode, file, false);
5006 		if (IS_ERR(iter))
5007 			ret = PTR_ERR(iter);
5008 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5009 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5010 	}
5011 
5012 	if (ret < 0)
5013 		trace_array_put(tr);
5014 
5015 	return ret;
5016 }
5017 
5018 /*
5019  * Some tracers are not suitable for instance buffers.
5020  * A tracer is always available for the global array (toplevel)
5021  * or if it explicitly states that it is.
5022  */
5023 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5024 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5025 {
5026 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5027 }
5028 
5029 /* Find the next tracer that this trace array may use */
5030 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5031 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5032 {
5033 	while (t && !trace_ok_for_array(t, tr))
5034 		t = t->next;
5035 
5036 	return t;
5037 }
5038 
5039 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5040 t_next(struct seq_file *m, void *v, loff_t *pos)
5041 {
5042 	struct trace_array *tr = m->private;
5043 	struct tracer *t = v;
5044 
5045 	(*pos)++;
5046 
5047 	if (t)
5048 		t = get_tracer_for_array(tr, t->next);
5049 
5050 	return t;
5051 }
5052 
t_start(struct seq_file * m,loff_t * pos)5053 static void *t_start(struct seq_file *m, loff_t *pos)
5054 {
5055 	struct trace_array *tr = m->private;
5056 	struct tracer *t;
5057 	loff_t l = 0;
5058 
5059 	mutex_lock(&trace_types_lock);
5060 
5061 	t = get_tracer_for_array(tr, trace_types);
5062 	for (; t && l < *pos; t = t_next(m, t, &l))
5063 			;
5064 
5065 	return t;
5066 }
5067 
t_stop(struct seq_file * m,void * p)5068 static void t_stop(struct seq_file *m, void *p)
5069 {
5070 	mutex_unlock(&trace_types_lock);
5071 }
5072 
t_show(struct seq_file * m,void * v)5073 static int t_show(struct seq_file *m, void *v)
5074 {
5075 	struct tracer *t = v;
5076 
5077 	if (!t)
5078 		return 0;
5079 
5080 	seq_puts(m, t->name);
5081 	if (t->next)
5082 		seq_putc(m, ' ');
5083 	else
5084 		seq_putc(m, '\n');
5085 
5086 	return 0;
5087 }
5088 
5089 static const struct seq_operations show_traces_seq_ops = {
5090 	.start		= t_start,
5091 	.next		= t_next,
5092 	.stop		= t_stop,
5093 	.show		= t_show,
5094 };
5095 
show_traces_open(struct inode * inode,struct file * file)5096 static int show_traces_open(struct inode *inode, struct file *file)
5097 {
5098 	struct trace_array *tr = inode->i_private;
5099 	struct seq_file *m;
5100 	int ret;
5101 
5102 	ret = tracing_check_open_get_tr(tr);
5103 	if (ret)
5104 		return ret;
5105 
5106 	ret = seq_open(file, &show_traces_seq_ops);
5107 	if (ret) {
5108 		trace_array_put(tr);
5109 		return ret;
5110 	}
5111 
5112 	m = file->private_data;
5113 	m->private = tr;
5114 
5115 	return 0;
5116 }
5117 
show_traces_release(struct inode * inode,struct file * file)5118 static int show_traces_release(struct inode *inode, struct file *file)
5119 {
5120 	struct trace_array *tr = inode->i_private;
5121 
5122 	trace_array_put(tr);
5123 	return seq_release(inode, file);
5124 }
5125 
5126 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5127 tracing_write_stub(struct file *filp, const char __user *ubuf,
5128 		   size_t count, loff_t *ppos)
5129 {
5130 	return count;
5131 }
5132 
tracing_lseek(struct file * file,loff_t offset,int whence)5133 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5134 {
5135 	int ret;
5136 
5137 	if (file->f_mode & FMODE_READ)
5138 		ret = seq_lseek(file, offset, whence);
5139 	else
5140 		file->f_pos = ret = 0;
5141 
5142 	return ret;
5143 }
5144 
5145 static const struct file_operations tracing_fops = {
5146 	.open		= tracing_open,
5147 	.read		= seq_read,
5148 	.read_iter	= seq_read_iter,
5149 	.splice_read	= generic_file_splice_read,
5150 	.write		= tracing_write_stub,
5151 	.llseek		= tracing_lseek,
5152 	.release	= tracing_release,
5153 };
5154 
5155 static const struct file_operations show_traces_fops = {
5156 	.open		= show_traces_open,
5157 	.read		= seq_read,
5158 	.llseek		= seq_lseek,
5159 	.release	= show_traces_release,
5160 };
5161 
5162 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5163 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5164 		     size_t count, loff_t *ppos)
5165 {
5166 	struct trace_array *tr = file_inode(filp)->i_private;
5167 	char *mask_str;
5168 	int len;
5169 
5170 	len = snprintf(NULL, 0, "%*pb\n",
5171 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5172 	mask_str = kmalloc(len, GFP_KERNEL);
5173 	if (!mask_str)
5174 		return -ENOMEM;
5175 
5176 	len = snprintf(mask_str, len, "%*pb\n",
5177 		       cpumask_pr_args(tr->tracing_cpumask));
5178 	if (len >= count) {
5179 		count = -EINVAL;
5180 		goto out_err;
5181 	}
5182 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5183 
5184 out_err:
5185 	kfree(mask_str);
5186 
5187 	return count;
5188 }
5189 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5190 int tracing_set_cpumask(struct trace_array *tr,
5191 			cpumask_var_t tracing_cpumask_new)
5192 {
5193 	int cpu;
5194 
5195 	if (!tr)
5196 		return -EINVAL;
5197 
5198 	local_irq_disable();
5199 	arch_spin_lock(&tr->max_lock);
5200 	for_each_tracing_cpu(cpu) {
5201 		/*
5202 		 * Increase/decrease the disabled counter if we are
5203 		 * about to flip a bit in the cpumask:
5204 		 */
5205 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5206 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5207 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5208 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5209 #ifdef CONFIG_TRACER_MAX_TRACE
5210 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5211 #endif
5212 		}
5213 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5214 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5215 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5216 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5217 #ifdef CONFIG_TRACER_MAX_TRACE
5218 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5219 #endif
5220 		}
5221 	}
5222 	arch_spin_unlock(&tr->max_lock);
5223 	local_irq_enable();
5224 
5225 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5226 
5227 	return 0;
5228 }
5229 
5230 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5231 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5232 		      size_t count, loff_t *ppos)
5233 {
5234 	struct trace_array *tr = file_inode(filp)->i_private;
5235 	cpumask_var_t tracing_cpumask_new;
5236 	int err;
5237 
5238 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5239 		return -ENOMEM;
5240 
5241 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5242 	if (err)
5243 		goto err_free;
5244 
5245 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5246 	if (err)
5247 		goto err_free;
5248 
5249 	free_cpumask_var(tracing_cpumask_new);
5250 
5251 	return count;
5252 
5253 err_free:
5254 	free_cpumask_var(tracing_cpumask_new);
5255 
5256 	return err;
5257 }
5258 
5259 static const struct file_operations tracing_cpumask_fops = {
5260 	.open		= tracing_open_generic_tr,
5261 	.read		= tracing_cpumask_read,
5262 	.write		= tracing_cpumask_write,
5263 	.release	= tracing_release_generic_tr,
5264 	.llseek		= generic_file_llseek,
5265 };
5266 
tracing_trace_options_show(struct seq_file * m,void * v)5267 static int tracing_trace_options_show(struct seq_file *m, void *v)
5268 {
5269 	struct tracer_opt *trace_opts;
5270 	struct trace_array *tr = m->private;
5271 	u32 tracer_flags;
5272 	int i;
5273 
5274 	mutex_lock(&trace_types_lock);
5275 	tracer_flags = tr->current_trace->flags->val;
5276 	trace_opts = tr->current_trace->flags->opts;
5277 
5278 	for (i = 0; trace_options[i]; i++) {
5279 		if (tr->trace_flags & (1 << i))
5280 			seq_printf(m, "%s\n", trace_options[i]);
5281 		else
5282 			seq_printf(m, "no%s\n", trace_options[i]);
5283 	}
5284 
5285 	for (i = 0; trace_opts[i].name; i++) {
5286 		if (tracer_flags & trace_opts[i].bit)
5287 			seq_printf(m, "%s\n", trace_opts[i].name);
5288 		else
5289 			seq_printf(m, "no%s\n", trace_opts[i].name);
5290 	}
5291 	mutex_unlock(&trace_types_lock);
5292 
5293 	return 0;
5294 }
5295 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5296 static int __set_tracer_option(struct trace_array *tr,
5297 			       struct tracer_flags *tracer_flags,
5298 			       struct tracer_opt *opts, int neg)
5299 {
5300 	struct tracer *trace = tracer_flags->trace;
5301 	int ret;
5302 
5303 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5304 	if (ret)
5305 		return ret;
5306 
5307 	if (neg)
5308 		tracer_flags->val &= ~opts->bit;
5309 	else
5310 		tracer_flags->val |= opts->bit;
5311 	return 0;
5312 }
5313 
5314 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5315 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5316 {
5317 	struct tracer *trace = tr->current_trace;
5318 	struct tracer_flags *tracer_flags = trace->flags;
5319 	struct tracer_opt *opts = NULL;
5320 	int i;
5321 
5322 	for (i = 0; tracer_flags->opts[i].name; i++) {
5323 		opts = &tracer_flags->opts[i];
5324 
5325 		if (strcmp(cmp, opts->name) == 0)
5326 			return __set_tracer_option(tr, trace->flags, opts, neg);
5327 	}
5328 
5329 	return -EINVAL;
5330 }
5331 
5332 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5333 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5334 {
5335 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5336 		return -1;
5337 
5338 	return 0;
5339 }
5340 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5341 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5342 {
5343 	int *map;
5344 
5345 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5346 	    (mask == TRACE_ITER_RECORD_CMD))
5347 		lockdep_assert_held(&event_mutex);
5348 
5349 	/* do nothing if flag is already set */
5350 	if (!!(tr->trace_flags & mask) == !!enabled)
5351 		return 0;
5352 
5353 	/* Give the tracer a chance to approve the change */
5354 	if (tr->current_trace->flag_changed)
5355 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5356 			return -EINVAL;
5357 
5358 	if (enabled)
5359 		tr->trace_flags |= mask;
5360 	else
5361 		tr->trace_flags &= ~mask;
5362 
5363 	if (mask == TRACE_ITER_RECORD_CMD)
5364 		trace_event_enable_cmd_record(enabled);
5365 
5366 	if (mask == TRACE_ITER_RECORD_TGID) {
5367 		if (!tgid_map) {
5368 			tgid_map_max = pid_max;
5369 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5370 				       GFP_KERNEL);
5371 
5372 			/*
5373 			 * Pairs with smp_load_acquire() in
5374 			 * trace_find_tgid_ptr() to ensure that if it observes
5375 			 * the tgid_map we just allocated then it also observes
5376 			 * the corresponding tgid_map_max value.
5377 			 */
5378 			smp_store_release(&tgid_map, map);
5379 		}
5380 		if (!tgid_map) {
5381 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5382 			return -ENOMEM;
5383 		}
5384 
5385 		trace_event_enable_tgid_record(enabled);
5386 	}
5387 
5388 	if (mask == TRACE_ITER_EVENT_FORK)
5389 		trace_event_follow_fork(tr, enabled);
5390 
5391 	if (mask == TRACE_ITER_FUNC_FORK)
5392 		ftrace_pid_follow_fork(tr, enabled);
5393 
5394 	if (mask == TRACE_ITER_OVERWRITE) {
5395 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5396 #ifdef CONFIG_TRACER_MAX_TRACE
5397 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5398 #endif
5399 	}
5400 
5401 	if (mask == TRACE_ITER_PRINTK) {
5402 		trace_printk_start_stop_comm(enabled);
5403 		trace_printk_control(enabled);
5404 	}
5405 
5406 	return 0;
5407 }
5408 
trace_set_options(struct trace_array * tr,char * option)5409 int trace_set_options(struct trace_array *tr, char *option)
5410 {
5411 	char *cmp;
5412 	int neg = 0;
5413 	int ret;
5414 	size_t orig_len = strlen(option);
5415 	int len;
5416 
5417 	cmp = strstrip(option);
5418 
5419 	len = str_has_prefix(cmp, "no");
5420 	if (len)
5421 		neg = 1;
5422 
5423 	cmp += len;
5424 
5425 	mutex_lock(&event_mutex);
5426 	mutex_lock(&trace_types_lock);
5427 
5428 	ret = match_string(trace_options, -1, cmp);
5429 	/* If no option could be set, test the specific tracer options */
5430 	if (ret < 0)
5431 		ret = set_tracer_option(tr, cmp, neg);
5432 	else
5433 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5434 
5435 	mutex_unlock(&trace_types_lock);
5436 	mutex_unlock(&event_mutex);
5437 
5438 	/*
5439 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5440 	 * turn it back into a space.
5441 	 */
5442 	if (orig_len > strlen(option))
5443 		option[strlen(option)] = ' ';
5444 
5445 	return ret;
5446 }
5447 
apply_trace_boot_options(void)5448 static void __init apply_trace_boot_options(void)
5449 {
5450 	char *buf = trace_boot_options_buf;
5451 	char *option;
5452 
5453 	while (true) {
5454 		option = strsep(&buf, ",");
5455 
5456 		if (!option)
5457 			break;
5458 
5459 		if (*option)
5460 			trace_set_options(&global_trace, option);
5461 
5462 		/* Put back the comma to allow this to be called again */
5463 		if (buf)
5464 			*(buf - 1) = ',';
5465 	}
5466 }
5467 
5468 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5469 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5470 			size_t cnt, loff_t *ppos)
5471 {
5472 	struct seq_file *m = filp->private_data;
5473 	struct trace_array *tr = m->private;
5474 	char buf[64];
5475 	int ret;
5476 
5477 	if (cnt >= sizeof(buf))
5478 		return -EINVAL;
5479 
5480 	if (copy_from_user(buf, ubuf, cnt))
5481 		return -EFAULT;
5482 
5483 	buf[cnt] = 0;
5484 
5485 	ret = trace_set_options(tr, buf);
5486 	if (ret < 0)
5487 		return ret;
5488 
5489 	*ppos += cnt;
5490 
5491 	return cnt;
5492 }
5493 
tracing_trace_options_open(struct inode * inode,struct file * file)5494 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5495 {
5496 	struct trace_array *tr = inode->i_private;
5497 	int ret;
5498 
5499 	ret = tracing_check_open_get_tr(tr);
5500 	if (ret)
5501 		return ret;
5502 
5503 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5504 	if (ret < 0)
5505 		trace_array_put(tr);
5506 
5507 	return ret;
5508 }
5509 
5510 static const struct file_operations tracing_iter_fops = {
5511 	.open		= tracing_trace_options_open,
5512 	.read		= seq_read,
5513 	.llseek		= seq_lseek,
5514 	.release	= tracing_single_release_tr,
5515 	.write		= tracing_trace_options_write,
5516 };
5517 
5518 static const char readme_msg[] =
5519 	"tracing mini-HOWTO:\n\n"
5520 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5521 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5522 	" Important files:\n"
5523 	"  trace\t\t\t- The static contents of the buffer\n"
5524 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5525 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5526 	"  current_tracer\t- function and latency tracers\n"
5527 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5528 	"  error_log\t- error log for failed commands (that support it)\n"
5529 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5530 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5531 	"  trace_clock\t\t-change the clock used to order events\n"
5532 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5533 	"      global:   Synced across CPUs but slows tracing down.\n"
5534 	"     counter:   Not a clock, but just an increment\n"
5535 	"      uptime:   Jiffy counter from time of boot\n"
5536 	"        perf:   Same clock that perf events use\n"
5537 #ifdef CONFIG_X86_64
5538 	"     x86-tsc:   TSC cycle counter\n"
5539 #endif
5540 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5541 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5542 	"    absolute:   Absolute (standalone) timestamp\n"
5543 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5544 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5545 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5546 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5547 	"\t\t\t  Remove sub-buffer with rmdir\n"
5548 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5549 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5550 	"\t\t\t  option name\n"
5551 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5552 #ifdef CONFIG_DYNAMIC_FTRACE
5553 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5554 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5555 	"\t\t\t  functions\n"
5556 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5557 	"\t     modules: Can select a group via module\n"
5558 	"\t      Format: :mod:<module-name>\n"
5559 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5560 	"\t    triggers: a command to perform when function is hit\n"
5561 	"\t      Format: <function>:<trigger>[:count]\n"
5562 	"\t     trigger: traceon, traceoff\n"
5563 	"\t\t      enable_event:<system>:<event>\n"
5564 	"\t\t      disable_event:<system>:<event>\n"
5565 #ifdef CONFIG_STACKTRACE
5566 	"\t\t      stacktrace\n"
5567 #endif
5568 #ifdef CONFIG_TRACER_SNAPSHOT
5569 	"\t\t      snapshot\n"
5570 #endif
5571 	"\t\t      dump\n"
5572 	"\t\t      cpudump\n"
5573 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5574 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5575 	"\t     The first one will disable tracing every time do_fault is hit\n"
5576 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5577 	"\t       The first time do trap is hit and it disables tracing, the\n"
5578 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5579 	"\t       the counter will not decrement. It only decrements when the\n"
5580 	"\t       trigger did work\n"
5581 	"\t     To remove trigger without count:\n"
5582 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5583 	"\t     To remove trigger with a count:\n"
5584 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5585 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5586 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5587 	"\t    modules: Can select a group via module command :mod:\n"
5588 	"\t    Does not accept triggers\n"
5589 #endif /* CONFIG_DYNAMIC_FTRACE */
5590 #ifdef CONFIG_FUNCTION_TRACER
5591 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5592 	"\t\t    (function)\n"
5593 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5594 	"\t\t    (function)\n"
5595 #endif
5596 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5597 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5598 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5599 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5600 #endif
5601 #ifdef CONFIG_TRACER_SNAPSHOT
5602 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5603 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5604 	"\t\t\t  information\n"
5605 #endif
5606 #ifdef CONFIG_STACK_TRACER
5607 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5608 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5609 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5610 	"\t\t\t  new trace)\n"
5611 #ifdef CONFIG_DYNAMIC_FTRACE
5612 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5613 	"\t\t\t  traces\n"
5614 #endif
5615 #endif /* CONFIG_STACK_TRACER */
5616 #ifdef CONFIG_DYNAMIC_EVENTS
5617 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5618 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5619 #endif
5620 #ifdef CONFIG_KPROBE_EVENTS
5621 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5622 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5623 #endif
5624 #ifdef CONFIG_UPROBE_EVENTS
5625 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5626 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5627 #endif
5628 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5629 	"\t  accepts: event-definitions (one definition per line)\n"
5630 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5631 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5632 #ifdef CONFIG_HIST_TRIGGERS
5633 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5634 #endif
5635 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5636 	"\t           -:[<group>/]<event>\n"
5637 #ifdef CONFIG_KPROBE_EVENTS
5638 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5639   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5640 #endif
5641 #ifdef CONFIG_UPROBE_EVENTS
5642   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5643 #endif
5644 	"\t     args: <name>=fetcharg[:type]\n"
5645 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5646 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5647 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5648 #else
5649 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5650 #endif
5651 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5652 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5653 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5654 	"\t           symstr, <type>\\[<array-size>\\]\n"
5655 #ifdef CONFIG_HIST_TRIGGERS
5656 	"\t    field: <stype> <name>;\n"
5657 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5658 	"\t           [unsigned] char/int/long\n"
5659 #endif
5660 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5661 	"\t            of the <attached-group>/<attached-event>.\n"
5662 #endif
5663 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5664 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5665 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5666 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5667 	"\t\t\t  events\n"
5668 	"      filter\t\t- If set, only events passing filter are traced\n"
5669 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5670 	"\t\t\t  <event>:\n"
5671 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5672 	"      filter\t\t- If set, only events passing filter are traced\n"
5673 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5674 	"\t    Format: <trigger>[:count][if <filter>]\n"
5675 	"\t   trigger: traceon, traceoff\n"
5676 	"\t            enable_event:<system>:<event>\n"
5677 	"\t            disable_event:<system>:<event>\n"
5678 #ifdef CONFIG_HIST_TRIGGERS
5679 	"\t            enable_hist:<system>:<event>\n"
5680 	"\t            disable_hist:<system>:<event>\n"
5681 #endif
5682 #ifdef CONFIG_STACKTRACE
5683 	"\t\t    stacktrace\n"
5684 #endif
5685 #ifdef CONFIG_TRACER_SNAPSHOT
5686 	"\t\t    snapshot\n"
5687 #endif
5688 #ifdef CONFIG_HIST_TRIGGERS
5689 	"\t\t    hist (see below)\n"
5690 #endif
5691 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5692 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5693 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5694 	"\t                  events/block/block_unplug/trigger\n"
5695 	"\t   The first disables tracing every time block_unplug is hit.\n"
5696 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5697 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5698 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5699 	"\t   Like function triggers, the counter is only decremented if it\n"
5700 	"\t    enabled or disabled tracing.\n"
5701 	"\t   To remove a trigger without a count:\n"
5702 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5703 	"\t   To remove a trigger with a count:\n"
5704 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5705 	"\t   Filters can be ignored when removing a trigger.\n"
5706 #ifdef CONFIG_HIST_TRIGGERS
5707 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5708 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5709 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5710 	"\t            [:values=<field1[,field2,...]>]\n"
5711 	"\t            [:sort=<field1[,field2,...]>]\n"
5712 	"\t            [:size=#entries]\n"
5713 	"\t            [:pause][:continue][:clear]\n"
5714 	"\t            [:name=histname1]\n"
5715 	"\t            [:<handler>.<action>]\n"
5716 	"\t            [if <filter>]\n\n"
5717 	"\t    Note, special fields can be used as well:\n"
5718 	"\t            common_timestamp - to record current timestamp\n"
5719 	"\t            common_cpu - to record the CPU the event happened on\n"
5720 	"\n"
5721 	"\t    A hist trigger variable can be:\n"
5722 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5723 	"\t        - a reference to another variable e.g. y=$x,\n"
5724 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5725 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5726 	"\n"
5727 	"\t    hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5728 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5729 	"\t    variable reference, field or numeric literal.\n"
5730 	"\n"
5731 	"\t    When a matching event is hit, an entry is added to a hash\n"
5732 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5733 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5734 	"\t    correspond to fields in the event's format description.  Keys\n"
5735 	"\t    can be any field, or the special string 'stacktrace'.\n"
5736 	"\t    Compound keys consisting of up to two fields can be specified\n"
5737 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5738 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5739 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5740 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5741 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5742 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5743 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5744 	"\t    its histogram data will be shared with other triggers of the\n"
5745 	"\t    same name, and trigger hits will update this common data.\n\n"
5746 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5747 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5748 	"\t    triggers attached to an event, there will be a table for each\n"
5749 	"\t    trigger in the output.  The table displayed for a named\n"
5750 	"\t    trigger will be the same as any other instance having the\n"
5751 	"\t    same name.  The default format used to display a given field\n"
5752 	"\t    can be modified by appending any of the following modifiers\n"
5753 	"\t    to the field name, as applicable:\n\n"
5754 	"\t            .hex        display a number as a hex value\n"
5755 	"\t            .sym        display an address as a symbol\n"
5756 	"\t            .sym-offset display an address as a symbol and offset\n"
5757 	"\t            .execname   display a common_pid as a program name\n"
5758 	"\t            .syscall    display a syscall id as a syscall name\n"
5759 	"\t            .log2       display log2 value rather than raw number\n"
5760 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5761 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5762 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5763 	"\t    trigger or to start a hist trigger but not log any events\n"
5764 	"\t    until told to do so.  'continue' can be used to start or\n"
5765 	"\t    restart a paused hist trigger.\n\n"
5766 	"\t    The 'clear' parameter will clear the contents of a running\n"
5767 	"\t    hist trigger and leave its current paused/active state\n"
5768 	"\t    unchanged.\n\n"
5769 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5770 	"\t    have one event conditionally start and stop another event's\n"
5771 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5772 	"\t    the enable_event and disable_event triggers.\n\n"
5773 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5774 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5775 	"\t        <handler>.<action>\n\n"
5776 	"\t    The available handlers are:\n\n"
5777 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5778 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5779 	"\t        onchange(var)            - invoke action if var changes\n\n"
5780 	"\t    The available actions are:\n\n"
5781 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5782 	"\t        save(field,...)                      - save current event fields\n"
5783 #ifdef CONFIG_TRACER_SNAPSHOT
5784 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5785 #endif
5786 #ifdef CONFIG_SYNTH_EVENTS
5787 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5788 	"\t  Write into this file to define/undefine new synthetic events.\n"
5789 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5790 #endif
5791 #endif
5792 ;
5793 
5794 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5795 tracing_readme_read(struct file *filp, char __user *ubuf,
5796 		       size_t cnt, loff_t *ppos)
5797 {
5798 	return simple_read_from_buffer(ubuf, cnt, ppos,
5799 					readme_msg, strlen(readme_msg));
5800 }
5801 
5802 static const struct file_operations tracing_readme_fops = {
5803 	.open		= tracing_open_generic,
5804 	.read		= tracing_readme_read,
5805 	.llseek		= generic_file_llseek,
5806 };
5807 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5808 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5809 {
5810 	int pid = ++(*pos);
5811 
5812 	return trace_find_tgid_ptr(pid);
5813 }
5814 
saved_tgids_start(struct seq_file * m,loff_t * pos)5815 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5816 {
5817 	int pid = *pos;
5818 
5819 	return trace_find_tgid_ptr(pid);
5820 }
5821 
saved_tgids_stop(struct seq_file * m,void * v)5822 static void saved_tgids_stop(struct seq_file *m, void *v)
5823 {
5824 }
5825 
saved_tgids_show(struct seq_file * m,void * v)5826 static int saved_tgids_show(struct seq_file *m, void *v)
5827 {
5828 	int *entry = (int *)v;
5829 	int pid = entry - tgid_map;
5830 	int tgid = *entry;
5831 
5832 	if (tgid == 0)
5833 		return SEQ_SKIP;
5834 
5835 	seq_printf(m, "%d %d\n", pid, tgid);
5836 	return 0;
5837 }
5838 
5839 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5840 	.start		= saved_tgids_start,
5841 	.stop		= saved_tgids_stop,
5842 	.next		= saved_tgids_next,
5843 	.show		= saved_tgids_show,
5844 };
5845 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5846 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5847 {
5848 	int ret;
5849 
5850 	ret = tracing_check_open_get_tr(NULL);
5851 	if (ret)
5852 		return ret;
5853 
5854 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5855 }
5856 
5857 
5858 static const struct file_operations tracing_saved_tgids_fops = {
5859 	.open		= tracing_saved_tgids_open,
5860 	.read		= seq_read,
5861 	.llseek		= seq_lseek,
5862 	.release	= seq_release,
5863 };
5864 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5865 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5866 {
5867 	unsigned int *ptr = v;
5868 
5869 	if (*pos || m->count)
5870 		ptr++;
5871 
5872 	(*pos)++;
5873 
5874 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5875 	     ptr++) {
5876 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5877 			continue;
5878 
5879 		return ptr;
5880 	}
5881 
5882 	return NULL;
5883 }
5884 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5885 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5886 {
5887 	void *v;
5888 	loff_t l = 0;
5889 
5890 	preempt_disable();
5891 	arch_spin_lock(&trace_cmdline_lock);
5892 
5893 	v = &savedcmd->map_cmdline_to_pid[0];
5894 	while (l <= *pos) {
5895 		v = saved_cmdlines_next(m, v, &l);
5896 		if (!v)
5897 			return NULL;
5898 	}
5899 
5900 	return v;
5901 }
5902 
saved_cmdlines_stop(struct seq_file * m,void * v)5903 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5904 {
5905 	arch_spin_unlock(&trace_cmdline_lock);
5906 	preempt_enable();
5907 }
5908 
saved_cmdlines_show(struct seq_file * m,void * v)5909 static int saved_cmdlines_show(struct seq_file *m, void *v)
5910 {
5911 	char buf[TASK_COMM_LEN];
5912 	unsigned int *pid = v;
5913 
5914 	__trace_find_cmdline(*pid, buf);
5915 	seq_printf(m, "%d %s\n", *pid, buf);
5916 	return 0;
5917 }
5918 
5919 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5920 	.start		= saved_cmdlines_start,
5921 	.next		= saved_cmdlines_next,
5922 	.stop		= saved_cmdlines_stop,
5923 	.show		= saved_cmdlines_show,
5924 };
5925 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5926 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5927 {
5928 	int ret;
5929 
5930 	ret = tracing_check_open_get_tr(NULL);
5931 	if (ret)
5932 		return ret;
5933 
5934 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5935 }
5936 
5937 static const struct file_operations tracing_saved_cmdlines_fops = {
5938 	.open		= tracing_saved_cmdlines_open,
5939 	.read		= seq_read,
5940 	.llseek		= seq_lseek,
5941 	.release	= seq_release,
5942 };
5943 
5944 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5945 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5946 				 size_t cnt, loff_t *ppos)
5947 {
5948 	char buf[64];
5949 	int r;
5950 
5951 	preempt_disable();
5952 	arch_spin_lock(&trace_cmdline_lock);
5953 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5954 	arch_spin_unlock(&trace_cmdline_lock);
5955 	preempt_enable();
5956 
5957 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5958 }
5959 
tracing_resize_saved_cmdlines(unsigned int val)5960 static int tracing_resize_saved_cmdlines(unsigned int val)
5961 {
5962 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5963 
5964 	s = allocate_cmdlines_buffer(val);
5965 	if (!s)
5966 		return -ENOMEM;
5967 
5968 	preempt_disable();
5969 	arch_spin_lock(&trace_cmdline_lock);
5970 	savedcmd_temp = savedcmd;
5971 	savedcmd = s;
5972 	arch_spin_unlock(&trace_cmdline_lock);
5973 	preempt_enable();
5974 	free_saved_cmdlines_buffer(savedcmd_temp);
5975 
5976 	return 0;
5977 }
5978 
5979 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5980 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5981 				  size_t cnt, loff_t *ppos)
5982 {
5983 	unsigned long val;
5984 	int ret;
5985 
5986 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5987 	if (ret)
5988 		return ret;
5989 
5990 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5991 	if (!val || val > PID_MAX_DEFAULT)
5992 		return -EINVAL;
5993 
5994 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5995 	if (ret < 0)
5996 		return ret;
5997 
5998 	*ppos += cnt;
5999 
6000 	return cnt;
6001 }
6002 
6003 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6004 	.open		= tracing_open_generic,
6005 	.read		= tracing_saved_cmdlines_size_read,
6006 	.write		= tracing_saved_cmdlines_size_write,
6007 };
6008 
6009 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6010 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6011 update_eval_map(union trace_eval_map_item *ptr)
6012 {
6013 	if (!ptr->map.eval_string) {
6014 		if (ptr->tail.next) {
6015 			ptr = ptr->tail.next;
6016 			/* Set ptr to the next real item (skip head) */
6017 			ptr++;
6018 		} else
6019 			return NULL;
6020 	}
6021 	return ptr;
6022 }
6023 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6024 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6025 {
6026 	union trace_eval_map_item *ptr = v;
6027 
6028 	/*
6029 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6030 	 * This really should never happen.
6031 	 */
6032 	(*pos)++;
6033 	ptr = update_eval_map(ptr);
6034 	if (WARN_ON_ONCE(!ptr))
6035 		return NULL;
6036 
6037 	ptr++;
6038 	ptr = update_eval_map(ptr);
6039 
6040 	return ptr;
6041 }
6042 
eval_map_start(struct seq_file * m,loff_t * pos)6043 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6044 {
6045 	union trace_eval_map_item *v;
6046 	loff_t l = 0;
6047 
6048 	mutex_lock(&trace_eval_mutex);
6049 
6050 	v = trace_eval_maps;
6051 	if (v)
6052 		v++;
6053 
6054 	while (v && l < *pos) {
6055 		v = eval_map_next(m, v, &l);
6056 	}
6057 
6058 	return v;
6059 }
6060 
eval_map_stop(struct seq_file * m,void * v)6061 static void eval_map_stop(struct seq_file *m, void *v)
6062 {
6063 	mutex_unlock(&trace_eval_mutex);
6064 }
6065 
eval_map_show(struct seq_file * m,void * v)6066 static int eval_map_show(struct seq_file *m, void *v)
6067 {
6068 	union trace_eval_map_item *ptr = v;
6069 
6070 	seq_printf(m, "%s %ld (%s)\n",
6071 		   ptr->map.eval_string, ptr->map.eval_value,
6072 		   ptr->map.system);
6073 
6074 	return 0;
6075 }
6076 
6077 static const struct seq_operations tracing_eval_map_seq_ops = {
6078 	.start		= eval_map_start,
6079 	.next		= eval_map_next,
6080 	.stop		= eval_map_stop,
6081 	.show		= eval_map_show,
6082 };
6083 
tracing_eval_map_open(struct inode * inode,struct file * filp)6084 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6085 {
6086 	int ret;
6087 
6088 	ret = tracing_check_open_get_tr(NULL);
6089 	if (ret)
6090 		return ret;
6091 
6092 	return seq_open(filp, &tracing_eval_map_seq_ops);
6093 }
6094 
6095 static const struct file_operations tracing_eval_map_fops = {
6096 	.open		= tracing_eval_map_open,
6097 	.read		= seq_read,
6098 	.llseek		= seq_lseek,
6099 	.release	= seq_release,
6100 };
6101 
6102 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6103 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6104 {
6105 	/* Return tail of array given the head */
6106 	return ptr + ptr->head.length + 1;
6107 }
6108 
6109 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6110 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6111 			   int len)
6112 {
6113 	struct trace_eval_map **stop;
6114 	struct trace_eval_map **map;
6115 	union trace_eval_map_item *map_array;
6116 	union trace_eval_map_item *ptr;
6117 
6118 	stop = start + len;
6119 
6120 	/*
6121 	 * The trace_eval_maps contains the map plus a head and tail item,
6122 	 * where the head holds the module and length of array, and the
6123 	 * tail holds a pointer to the next list.
6124 	 */
6125 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6126 	if (!map_array) {
6127 		pr_warn("Unable to allocate trace eval mapping\n");
6128 		return;
6129 	}
6130 
6131 	mutex_lock(&trace_eval_mutex);
6132 
6133 	if (!trace_eval_maps)
6134 		trace_eval_maps = map_array;
6135 	else {
6136 		ptr = trace_eval_maps;
6137 		for (;;) {
6138 			ptr = trace_eval_jmp_to_tail(ptr);
6139 			if (!ptr->tail.next)
6140 				break;
6141 			ptr = ptr->tail.next;
6142 
6143 		}
6144 		ptr->tail.next = map_array;
6145 	}
6146 	map_array->head.mod = mod;
6147 	map_array->head.length = len;
6148 	map_array++;
6149 
6150 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6151 		map_array->map = **map;
6152 		map_array++;
6153 	}
6154 	memset(map_array, 0, sizeof(*map_array));
6155 
6156 	mutex_unlock(&trace_eval_mutex);
6157 }
6158 
trace_create_eval_file(struct dentry * d_tracer)6159 static void trace_create_eval_file(struct dentry *d_tracer)
6160 {
6161 	trace_create_file("eval_map", 0444, d_tracer,
6162 			  NULL, &tracing_eval_map_fops);
6163 }
6164 
6165 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6166 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6167 static inline void trace_insert_eval_map_file(struct module *mod,
6168 			      struct trace_eval_map **start, int len) { }
6169 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6170 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6171 static void trace_insert_eval_map(struct module *mod,
6172 				  struct trace_eval_map **start, int len)
6173 {
6174 	struct trace_eval_map **map;
6175 
6176 	if (len <= 0)
6177 		return;
6178 
6179 	map = start;
6180 
6181 	trace_event_eval_update(map, len);
6182 
6183 	trace_insert_eval_map_file(mod, start, len);
6184 }
6185 
6186 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6187 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6188 		       size_t cnt, loff_t *ppos)
6189 {
6190 	struct trace_array *tr = filp->private_data;
6191 	char buf[MAX_TRACER_SIZE+2];
6192 	int r;
6193 
6194 	mutex_lock(&trace_types_lock);
6195 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6196 	mutex_unlock(&trace_types_lock);
6197 
6198 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6199 }
6200 
tracer_init(struct tracer * t,struct trace_array * tr)6201 int tracer_init(struct tracer *t, struct trace_array *tr)
6202 {
6203 	tracing_reset_online_cpus(&tr->array_buffer);
6204 	return t->init(tr);
6205 }
6206 
set_buffer_entries(struct array_buffer * buf,unsigned long val)6207 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6208 {
6209 	int cpu;
6210 
6211 	for_each_tracing_cpu(cpu)
6212 		per_cpu_ptr(buf->data, cpu)->entries = val;
6213 }
6214 
update_buffer_entries(struct array_buffer * buf,int cpu)6215 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6216 {
6217 	if (cpu == RING_BUFFER_ALL_CPUS) {
6218 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6219 	} else {
6220 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6221 	}
6222 }
6223 
6224 #ifdef CONFIG_TRACER_MAX_TRACE
6225 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6226 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6227 					struct array_buffer *size_buf, int cpu_id)
6228 {
6229 	int cpu, ret = 0;
6230 
6231 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6232 		for_each_tracing_cpu(cpu) {
6233 			ret = ring_buffer_resize(trace_buf->buffer,
6234 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6235 			if (ret < 0)
6236 				break;
6237 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6238 				per_cpu_ptr(size_buf->data, cpu)->entries;
6239 		}
6240 	} else {
6241 		ret = ring_buffer_resize(trace_buf->buffer,
6242 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6243 		if (ret == 0)
6244 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6245 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6246 	}
6247 
6248 	return ret;
6249 }
6250 #endif /* CONFIG_TRACER_MAX_TRACE */
6251 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6252 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6253 					unsigned long size, int cpu)
6254 {
6255 	int ret;
6256 
6257 	/*
6258 	 * If kernel or user changes the size of the ring buffer
6259 	 * we use the size that was given, and we can forget about
6260 	 * expanding it later.
6261 	 */
6262 	ring_buffer_expanded = true;
6263 
6264 	/* May be called before buffers are initialized */
6265 	if (!tr->array_buffer.buffer)
6266 		return 0;
6267 
6268 	/* Do not allow tracing while resizing ring buffer */
6269 	tracing_stop_tr(tr);
6270 
6271 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6272 	if (ret < 0)
6273 		goto out_start;
6274 
6275 #ifdef CONFIG_TRACER_MAX_TRACE
6276 	if (!tr->allocated_snapshot)
6277 		goto out;
6278 
6279 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6280 	if (ret < 0) {
6281 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6282 						     &tr->array_buffer, cpu);
6283 		if (r < 0) {
6284 			/*
6285 			 * AARGH! We are left with different
6286 			 * size max buffer!!!!
6287 			 * The max buffer is our "snapshot" buffer.
6288 			 * When a tracer needs a snapshot (one of the
6289 			 * latency tracers), it swaps the max buffer
6290 			 * with the saved snap shot. We succeeded to
6291 			 * update the size of the main buffer, but failed to
6292 			 * update the size of the max buffer. But when we tried
6293 			 * to reset the main buffer to the original size, we
6294 			 * failed there too. This is very unlikely to
6295 			 * happen, but if it does, warn and kill all
6296 			 * tracing.
6297 			 */
6298 			WARN_ON(1);
6299 			tracing_disabled = 1;
6300 		}
6301 		goto out_start;
6302 	}
6303 
6304 	update_buffer_entries(&tr->max_buffer, cpu);
6305 
6306  out:
6307 #endif /* CONFIG_TRACER_MAX_TRACE */
6308 
6309 	update_buffer_entries(&tr->array_buffer, cpu);
6310  out_start:
6311 	tracing_start_tr(tr);
6312 	return ret;
6313 }
6314 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6315 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6316 				  unsigned long size, int cpu_id)
6317 {
6318 	int ret;
6319 
6320 	mutex_lock(&trace_types_lock);
6321 
6322 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6323 		/* make sure, this cpu is enabled in the mask */
6324 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6325 			ret = -EINVAL;
6326 			goto out;
6327 		}
6328 	}
6329 
6330 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6331 	if (ret < 0)
6332 		ret = -ENOMEM;
6333 
6334 out:
6335 	mutex_unlock(&trace_types_lock);
6336 
6337 	return ret;
6338 }
6339 
6340 
6341 /**
6342  * tracing_update_buffers - used by tracing facility to expand ring buffers
6343  *
6344  * To save on memory when the tracing is never used on a system with it
6345  * configured in. The ring buffers are set to a minimum size. But once
6346  * a user starts to use the tracing facility, then they need to grow
6347  * to their default size.
6348  *
6349  * This function is to be called when a tracer is about to be used.
6350  */
tracing_update_buffers(void)6351 int tracing_update_buffers(void)
6352 {
6353 	int ret = 0;
6354 
6355 	mutex_lock(&trace_types_lock);
6356 	if (!ring_buffer_expanded)
6357 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6358 						RING_BUFFER_ALL_CPUS);
6359 	mutex_unlock(&trace_types_lock);
6360 
6361 	return ret;
6362 }
6363 
6364 struct trace_option_dentry;
6365 
6366 static void
6367 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6368 
6369 /*
6370  * Used to clear out the tracer before deletion of an instance.
6371  * Must have trace_types_lock held.
6372  */
tracing_set_nop(struct trace_array * tr)6373 static void tracing_set_nop(struct trace_array *tr)
6374 {
6375 	if (tr->current_trace == &nop_trace)
6376 		return;
6377 
6378 	tr->current_trace->enabled--;
6379 
6380 	if (tr->current_trace->reset)
6381 		tr->current_trace->reset(tr);
6382 
6383 	tr->current_trace = &nop_trace;
6384 }
6385 
6386 static bool tracer_options_updated;
6387 
add_tracer_options(struct trace_array * tr,struct tracer * t)6388 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6389 {
6390 	/* Only enable if the directory has been created already. */
6391 	if (!tr->dir)
6392 		return;
6393 
6394 	/* Only create trace option files after update_tracer_options finish */
6395 	if (!tracer_options_updated)
6396 		return;
6397 
6398 	create_trace_option_files(tr, t);
6399 }
6400 
tracing_set_tracer(struct trace_array * tr,const char * buf)6401 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6402 {
6403 	struct tracer *t;
6404 #ifdef CONFIG_TRACER_MAX_TRACE
6405 	bool had_max_tr;
6406 #endif
6407 	int ret = 0;
6408 
6409 	mutex_lock(&trace_types_lock);
6410 
6411 	if (!ring_buffer_expanded) {
6412 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6413 						RING_BUFFER_ALL_CPUS);
6414 		if (ret < 0)
6415 			goto out;
6416 		ret = 0;
6417 	}
6418 
6419 	for (t = trace_types; t; t = t->next) {
6420 		if (strcmp(t->name, buf) == 0)
6421 			break;
6422 	}
6423 	if (!t) {
6424 		ret = -EINVAL;
6425 		goto out;
6426 	}
6427 	if (t == tr->current_trace)
6428 		goto out;
6429 
6430 #ifdef CONFIG_TRACER_SNAPSHOT
6431 	if (t->use_max_tr) {
6432 		local_irq_disable();
6433 		arch_spin_lock(&tr->max_lock);
6434 		if (tr->cond_snapshot)
6435 			ret = -EBUSY;
6436 		arch_spin_unlock(&tr->max_lock);
6437 		local_irq_enable();
6438 		if (ret)
6439 			goto out;
6440 	}
6441 #endif
6442 	/* Some tracers won't work on kernel command line */
6443 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6444 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6445 			t->name);
6446 		goto out;
6447 	}
6448 
6449 	/* Some tracers are only allowed for the top level buffer */
6450 	if (!trace_ok_for_array(t, tr)) {
6451 		ret = -EINVAL;
6452 		goto out;
6453 	}
6454 
6455 	/* If trace pipe files are being read, we can't change the tracer */
6456 	if (tr->trace_ref) {
6457 		ret = -EBUSY;
6458 		goto out;
6459 	}
6460 
6461 	trace_branch_disable();
6462 
6463 	tr->current_trace->enabled--;
6464 
6465 	if (tr->current_trace->reset)
6466 		tr->current_trace->reset(tr);
6467 
6468 #ifdef CONFIG_TRACER_MAX_TRACE
6469 	had_max_tr = tr->current_trace->use_max_tr;
6470 
6471 	/* Current trace needs to be nop_trace before synchronize_rcu */
6472 	tr->current_trace = &nop_trace;
6473 
6474 	if (had_max_tr && !t->use_max_tr) {
6475 		/*
6476 		 * We need to make sure that the update_max_tr sees that
6477 		 * current_trace changed to nop_trace to keep it from
6478 		 * swapping the buffers after we resize it.
6479 		 * The update_max_tr is called from interrupts disabled
6480 		 * so a synchronized_sched() is sufficient.
6481 		 */
6482 		synchronize_rcu();
6483 		free_snapshot(tr);
6484 	}
6485 
6486 	if (t->use_max_tr && !tr->allocated_snapshot) {
6487 		ret = tracing_alloc_snapshot_instance(tr);
6488 		if (ret < 0)
6489 			goto out;
6490 	}
6491 #else
6492 	tr->current_trace = &nop_trace;
6493 #endif
6494 
6495 	if (t->init) {
6496 		ret = tracer_init(t, tr);
6497 		if (ret)
6498 			goto out;
6499 	}
6500 
6501 	tr->current_trace = t;
6502 	tr->current_trace->enabled++;
6503 	trace_branch_enable(tr);
6504  out:
6505 	mutex_unlock(&trace_types_lock);
6506 
6507 	return ret;
6508 }
6509 
6510 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6511 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6512 			size_t cnt, loff_t *ppos)
6513 {
6514 	struct trace_array *tr = filp->private_data;
6515 	char buf[MAX_TRACER_SIZE+1];
6516 	int i;
6517 	size_t ret;
6518 	int err;
6519 
6520 	ret = cnt;
6521 
6522 	if (cnt > MAX_TRACER_SIZE)
6523 		cnt = MAX_TRACER_SIZE;
6524 
6525 	if (copy_from_user(buf, ubuf, cnt))
6526 		return -EFAULT;
6527 
6528 	buf[cnt] = 0;
6529 
6530 	/* strip ending whitespace. */
6531 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6532 		buf[i] = 0;
6533 
6534 	err = tracing_set_tracer(tr, buf);
6535 	if (err)
6536 		return err;
6537 
6538 	*ppos += ret;
6539 
6540 	return ret;
6541 }
6542 
6543 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6544 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6545 		   size_t cnt, loff_t *ppos)
6546 {
6547 	char buf[64];
6548 	int r;
6549 
6550 	r = snprintf(buf, sizeof(buf), "%ld\n",
6551 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6552 	if (r > sizeof(buf))
6553 		r = sizeof(buf);
6554 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6555 }
6556 
6557 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6558 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6559 		    size_t cnt, loff_t *ppos)
6560 {
6561 	unsigned long val;
6562 	int ret;
6563 
6564 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6565 	if (ret)
6566 		return ret;
6567 
6568 	*ptr = val * 1000;
6569 
6570 	return cnt;
6571 }
6572 
6573 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6574 tracing_thresh_read(struct file *filp, char __user *ubuf,
6575 		    size_t cnt, loff_t *ppos)
6576 {
6577 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6578 }
6579 
6580 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6581 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6582 		     size_t cnt, loff_t *ppos)
6583 {
6584 	struct trace_array *tr = filp->private_data;
6585 	int ret;
6586 
6587 	mutex_lock(&trace_types_lock);
6588 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6589 	if (ret < 0)
6590 		goto out;
6591 
6592 	if (tr->current_trace->update_thresh) {
6593 		ret = tr->current_trace->update_thresh(tr);
6594 		if (ret < 0)
6595 			goto out;
6596 	}
6597 
6598 	ret = cnt;
6599 out:
6600 	mutex_unlock(&trace_types_lock);
6601 
6602 	return ret;
6603 }
6604 
6605 #ifdef CONFIG_TRACER_MAX_TRACE
6606 
6607 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6608 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6609 		     size_t cnt, loff_t *ppos)
6610 {
6611 	struct trace_array *tr = filp->private_data;
6612 
6613 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6614 }
6615 
6616 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6617 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6618 		      size_t cnt, loff_t *ppos)
6619 {
6620 	struct trace_array *tr = filp->private_data;
6621 
6622 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6623 }
6624 
6625 #endif
6626 
tracing_open_pipe(struct inode * inode,struct file * filp)6627 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6628 {
6629 	struct trace_array *tr = inode->i_private;
6630 	struct trace_iterator *iter;
6631 	int ret;
6632 
6633 	ret = tracing_check_open_get_tr(tr);
6634 	if (ret)
6635 		return ret;
6636 
6637 	mutex_lock(&trace_types_lock);
6638 
6639 	/* create a buffer to store the information to pass to userspace */
6640 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6641 	if (!iter) {
6642 		ret = -ENOMEM;
6643 		__trace_array_put(tr);
6644 		goto out;
6645 	}
6646 
6647 	trace_seq_init(&iter->seq);
6648 	iter->trace = tr->current_trace;
6649 
6650 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6651 		ret = -ENOMEM;
6652 		goto fail;
6653 	}
6654 
6655 	/* trace pipe does not show start of buffer */
6656 	cpumask_setall(iter->started);
6657 
6658 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6659 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6660 
6661 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6662 	if (trace_clocks[tr->clock_id].in_ns)
6663 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6664 
6665 	iter->tr = tr;
6666 	iter->array_buffer = &tr->array_buffer;
6667 	iter->cpu_file = tracing_get_cpu(inode);
6668 	mutex_init(&iter->mutex);
6669 	filp->private_data = iter;
6670 
6671 	if (iter->trace->pipe_open)
6672 		iter->trace->pipe_open(iter);
6673 
6674 	nonseekable_open(inode, filp);
6675 
6676 	tr->trace_ref++;
6677 out:
6678 	mutex_unlock(&trace_types_lock);
6679 	return ret;
6680 
6681 fail:
6682 	kfree(iter);
6683 	__trace_array_put(tr);
6684 	mutex_unlock(&trace_types_lock);
6685 	return ret;
6686 }
6687 
tracing_release_pipe(struct inode * inode,struct file * file)6688 static int tracing_release_pipe(struct inode *inode, struct file *file)
6689 {
6690 	struct trace_iterator *iter = file->private_data;
6691 	struct trace_array *tr = inode->i_private;
6692 
6693 	mutex_lock(&trace_types_lock);
6694 
6695 	tr->trace_ref--;
6696 
6697 	if (iter->trace->pipe_close)
6698 		iter->trace->pipe_close(iter);
6699 
6700 	mutex_unlock(&trace_types_lock);
6701 
6702 	free_cpumask_var(iter->started);
6703 	kfree(iter->fmt);
6704 	kfree(iter->temp);
6705 	mutex_destroy(&iter->mutex);
6706 	kfree(iter);
6707 
6708 	trace_array_put(tr);
6709 
6710 	return 0;
6711 }
6712 
6713 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6714 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6715 {
6716 	struct trace_array *tr = iter->tr;
6717 
6718 	/* Iterators are static, they should be filled or empty */
6719 	if (trace_buffer_iter(iter, iter->cpu_file))
6720 		return EPOLLIN | EPOLLRDNORM;
6721 
6722 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6723 		/*
6724 		 * Always select as readable when in blocking mode
6725 		 */
6726 		return EPOLLIN | EPOLLRDNORM;
6727 	else
6728 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6729 					     filp, poll_table, iter->tr->buffer_percent);
6730 }
6731 
6732 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6733 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6734 {
6735 	struct trace_iterator *iter = filp->private_data;
6736 
6737 	return trace_poll(iter, filp, poll_table);
6738 }
6739 
6740 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6741 static int tracing_wait_pipe(struct file *filp)
6742 {
6743 	struct trace_iterator *iter = filp->private_data;
6744 	int ret;
6745 
6746 	while (trace_empty(iter)) {
6747 
6748 		if ((filp->f_flags & O_NONBLOCK)) {
6749 			return -EAGAIN;
6750 		}
6751 
6752 		/*
6753 		 * We block until we read something and tracing is disabled.
6754 		 * We still block if tracing is disabled, but we have never
6755 		 * read anything. This allows a user to cat this file, and
6756 		 * then enable tracing. But after we have read something,
6757 		 * we give an EOF when tracing is again disabled.
6758 		 *
6759 		 * iter->pos will be 0 if we haven't read anything.
6760 		 */
6761 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6762 			break;
6763 
6764 		mutex_unlock(&iter->mutex);
6765 
6766 		ret = wait_on_pipe(iter, 0);
6767 
6768 		mutex_lock(&iter->mutex);
6769 
6770 		if (ret)
6771 			return ret;
6772 	}
6773 
6774 	return 1;
6775 }
6776 
6777 /*
6778  * Consumer reader.
6779  */
6780 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6781 tracing_read_pipe(struct file *filp, char __user *ubuf,
6782 		  size_t cnt, loff_t *ppos)
6783 {
6784 	struct trace_iterator *iter = filp->private_data;
6785 	ssize_t sret;
6786 
6787 	/*
6788 	 * Avoid more than one consumer on a single file descriptor
6789 	 * This is just a matter of traces coherency, the ring buffer itself
6790 	 * is protected.
6791 	 */
6792 	mutex_lock(&iter->mutex);
6793 
6794 	/* return any leftover data */
6795 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6796 	if (sret != -EBUSY)
6797 		goto out;
6798 
6799 	trace_seq_init(&iter->seq);
6800 
6801 	if (iter->trace->read) {
6802 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6803 		if (sret)
6804 			goto out;
6805 	}
6806 
6807 waitagain:
6808 	sret = tracing_wait_pipe(filp);
6809 	if (sret <= 0)
6810 		goto out;
6811 
6812 	/* stop when tracing is finished */
6813 	if (trace_empty(iter)) {
6814 		sret = 0;
6815 		goto out;
6816 	}
6817 
6818 	if (cnt >= PAGE_SIZE)
6819 		cnt = PAGE_SIZE - 1;
6820 
6821 	/* reset all but tr, trace, and overruns */
6822 	memset(&iter->seq, 0,
6823 	       sizeof(struct trace_iterator) -
6824 	       offsetof(struct trace_iterator, seq));
6825 	cpumask_clear(iter->started);
6826 	trace_seq_init(&iter->seq);
6827 	iter->pos = -1;
6828 
6829 	trace_event_read_lock();
6830 	trace_access_lock(iter->cpu_file);
6831 	while (trace_find_next_entry_inc(iter) != NULL) {
6832 		enum print_line_t ret;
6833 		int save_len = iter->seq.seq.len;
6834 
6835 		ret = print_trace_line(iter);
6836 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6837 			/*
6838 			 * If one print_trace_line() fills entire trace_seq in one shot,
6839 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6840 			 * In this case, we need to consume it, otherwise, loop will peek
6841 			 * this event next time, resulting in an infinite loop.
6842 			 */
6843 			if (save_len == 0) {
6844 				iter->seq.full = 0;
6845 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6846 				trace_consume(iter);
6847 				break;
6848 			}
6849 
6850 			/* In other cases, don't print partial lines */
6851 			iter->seq.seq.len = save_len;
6852 			break;
6853 		}
6854 		if (ret != TRACE_TYPE_NO_CONSUME)
6855 			trace_consume(iter);
6856 
6857 		if (trace_seq_used(&iter->seq) >= cnt)
6858 			break;
6859 
6860 		/*
6861 		 * Setting the full flag means we reached the trace_seq buffer
6862 		 * size and we should leave by partial output condition above.
6863 		 * One of the trace_seq_* functions is not used properly.
6864 		 */
6865 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6866 			  iter->ent->type);
6867 	}
6868 	trace_access_unlock(iter->cpu_file);
6869 	trace_event_read_unlock();
6870 
6871 	/* Now copy what we have to the user */
6872 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6873 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6874 		trace_seq_init(&iter->seq);
6875 
6876 	/*
6877 	 * If there was nothing to send to user, in spite of consuming trace
6878 	 * entries, go back to wait for more entries.
6879 	 */
6880 	if (sret == -EBUSY)
6881 		goto waitagain;
6882 
6883 out:
6884 	mutex_unlock(&iter->mutex);
6885 
6886 	return sret;
6887 }
6888 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6889 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6890 				     unsigned int idx)
6891 {
6892 	__free_page(spd->pages[idx]);
6893 }
6894 
6895 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6896 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6897 {
6898 	size_t count;
6899 	int save_len;
6900 	int ret;
6901 
6902 	/* Seq buffer is page-sized, exactly what we need. */
6903 	for (;;) {
6904 		save_len = iter->seq.seq.len;
6905 		ret = print_trace_line(iter);
6906 
6907 		if (trace_seq_has_overflowed(&iter->seq)) {
6908 			iter->seq.seq.len = save_len;
6909 			break;
6910 		}
6911 
6912 		/*
6913 		 * This should not be hit, because it should only
6914 		 * be set if the iter->seq overflowed. But check it
6915 		 * anyway to be safe.
6916 		 */
6917 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6918 			iter->seq.seq.len = save_len;
6919 			break;
6920 		}
6921 
6922 		count = trace_seq_used(&iter->seq) - save_len;
6923 		if (rem < count) {
6924 			rem = 0;
6925 			iter->seq.seq.len = save_len;
6926 			break;
6927 		}
6928 
6929 		if (ret != TRACE_TYPE_NO_CONSUME)
6930 			trace_consume(iter);
6931 		rem -= count;
6932 		if (!trace_find_next_entry_inc(iter))	{
6933 			rem = 0;
6934 			iter->ent = NULL;
6935 			break;
6936 		}
6937 	}
6938 
6939 	return rem;
6940 }
6941 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6942 static ssize_t tracing_splice_read_pipe(struct file *filp,
6943 					loff_t *ppos,
6944 					struct pipe_inode_info *pipe,
6945 					size_t len,
6946 					unsigned int flags)
6947 {
6948 	struct page *pages_def[PIPE_DEF_BUFFERS];
6949 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6950 	struct trace_iterator *iter = filp->private_data;
6951 	struct splice_pipe_desc spd = {
6952 		.pages		= pages_def,
6953 		.partial	= partial_def,
6954 		.nr_pages	= 0, /* This gets updated below. */
6955 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6956 		.ops		= &default_pipe_buf_ops,
6957 		.spd_release	= tracing_spd_release_pipe,
6958 	};
6959 	ssize_t ret;
6960 	size_t rem;
6961 	unsigned int i;
6962 
6963 	if (splice_grow_spd(pipe, &spd))
6964 		return -ENOMEM;
6965 
6966 	mutex_lock(&iter->mutex);
6967 
6968 	if (iter->trace->splice_read) {
6969 		ret = iter->trace->splice_read(iter, filp,
6970 					       ppos, pipe, len, flags);
6971 		if (ret)
6972 			goto out_err;
6973 	}
6974 
6975 	ret = tracing_wait_pipe(filp);
6976 	if (ret <= 0)
6977 		goto out_err;
6978 
6979 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6980 		ret = -EFAULT;
6981 		goto out_err;
6982 	}
6983 
6984 	trace_event_read_lock();
6985 	trace_access_lock(iter->cpu_file);
6986 
6987 	/* Fill as many pages as possible. */
6988 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6989 		spd.pages[i] = alloc_page(GFP_KERNEL);
6990 		if (!spd.pages[i])
6991 			break;
6992 
6993 		rem = tracing_fill_pipe_page(rem, iter);
6994 
6995 		/* Copy the data into the page, so we can start over. */
6996 		ret = trace_seq_to_buffer(&iter->seq,
6997 					  page_address(spd.pages[i]),
6998 					  trace_seq_used(&iter->seq));
6999 		if (ret < 0) {
7000 			__free_page(spd.pages[i]);
7001 			break;
7002 		}
7003 		spd.partial[i].offset = 0;
7004 		spd.partial[i].len = trace_seq_used(&iter->seq);
7005 
7006 		trace_seq_init(&iter->seq);
7007 	}
7008 
7009 	trace_access_unlock(iter->cpu_file);
7010 	trace_event_read_unlock();
7011 	mutex_unlock(&iter->mutex);
7012 
7013 	spd.nr_pages = i;
7014 
7015 	if (i)
7016 		ret = splice_to_pipe(pipe, &spd);
7017 	else
7018 		ret = 0;
7019 out:
7020 	splice_shrink_spd(&spd);
7021 	return ret;
7022 
7023 out_err:
7024 	mutex_unlock(&iter->mutex);
7025 	goto out;
7026 }
7027 
7028 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7029 tracing_entries_read(struct file *filp, char __user *ubuf,
7030 		     size_t cnt, loff_t *ppos)
7031 {
7032 	struct inode *inode = file_inode(filp);
7033 	struct trace_array *tr = inode->i_private;
7034 	int cpu = tracing_get_cpu(inode);
7035 	char buf[64];
7036 	int r = 0;
7037 	ssize_t ret;
7038 
7039 	mutex_lock(&trace_types_lock);
7040 
7041 	if (cpu == RING_BUFFER_ALL_CPUS) {
7042 		int cpu, buf_size_same;
7043 		unsigned long size;
7044 
7045 		size = 0;
7046 		buf_size_same = 1;
7047 		/* check if all cpu sizes are same */
7048 		for_each_tracing_cpu(cpu) {
7049 			/* fill in the size from first enabled cpu */
7050 			if (size == 0)
7051 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7052 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7053 				buf_size_same = 0;
7054 				break;
7055 			}
7056 		}
7057 
7058 		if (buf_size_same) {
7059 			if (!ring_buffer_expanded)
7060 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7061 					    size >> 10,
7062 					    trace_buf_size >> 10);
7063 			else
7064 				r = sprintf(buf, "%lu\n", size >> 10);
7065 		} else
7066 			r = sprintf(buf, "X\n");
7067 	} else
7068 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7069 
7070 	mutex_unlock(&trace_types_lock);
7071 
7072 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7073 	return ret;
7074 }
7075 
7076 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7077 tracing_entries_write(struct file *filp, const char __user *ubuf,
7078 		      size_t cnt, loff_t *ppos)
7079 {
7080 	struct inode *inode = file_inode(filp);
7081 	struct trace_array *tr = inode->i_private;
7082 	unsigned long val;
7083 	int ret;
7084 
7085 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7086 	if (ret)
7087 		return ret;
7088 
7089 	/* must have at least 1 entry */
7090 	if (!val)
7091 		return -EINVAL;
7092 
7093 	/* value is in KB */
7094 	val <<= 10;
7095 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7096 	if (ret < 0)
7097 		return ret;
7098 
7099 	*ppos += cnt;
7100 
7101 	return cnt;
7102 }
7103 
7104 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7105 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7106 				size_t cnt, loff_t *ppos)
7107 {
7108 	struct trace_array *tr = filp->private_data;
7109 	char buf[64];
7110 	int r, cpu;
7111 	unsigned long size = 0, expanded_size = 0;
7112 
7113 	mutex_lock(&trace_types_lock);
7114 	for_each_tracing_cpu(cpu) {
7115 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7116 		if (!ring_buffer_expanded)
7117 			expanded_size += trace_buf_size >> 10;
7118 	}
7119 	if (ring_buffer_expanded)
7120 		r = sprintf(buf, "%lu\n", size);
7121 	else
7122 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7123 	mutex_unlock(&trace_types_lock);
7124 
7125 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7126 }
7127 
7128 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7129 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7130 			  size_t cnt, loff_t *ppos)
7131 {
7132 	/*
7133 	 * There is no need to read what the user has written, this function
7134 	 * is just to make sure that there is no error when "echo" is used
7135 	 */
7136 
7137 	*ppos += cnt;
7138 
7139 	return cnt;
7140 }
7141 
7142 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7143 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7144 {
7145 	struct trace_array *tr = inode->i_private;
7146 
7147 	/* disable tracing ? */
7148 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7149 		tracer_tracing_off(tr);
7150 	/* resize the ring buffer to 0 */
7151 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7152 
7153 	trace_array_put(tr);
7154 
7155 	return 0;
7156 }
7157 
7158 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7159 tracing_mark_write(struct file *filp, const char __user *ubuf,
7160 					size_t cnt, loff_t *fpos)
7161 {
7162 	struct trace_array *tr = filp->private_data;
7163 	struct ring_buffer_event *event;
7164 	enum event_trigger_type tt = ETT_NONE;
7165 	struct trace_buffer *buffer;
7166 	struct print_entry *entry;
7167 	ssize_t written;
7168 	int size;
7169 	int len;
7170 
7171 /* Used in tracing_mark_raw_write() as well */
7172 #define FAULTED_STR "<faulted>"
7173 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7174 
7175 	if (tracing_disabled)
7176 		return -EINVAL;
7177 
7178 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7179 		return -EINVAL;
7180 
7181 	if (cnt > TRACE_BUF_SIZE)
7182 		cnt = TRACE_BUF_SIZE;
7183 
7184 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7185 
7186 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7187 
7188 	/* If less than "<faulted>", then make sure we can still add that */
7189 	if (cnt < FAULTED_SIZE)
7190 		size += FAULTED_SIZE - cnt;
7191 
7192 	buffer = tr->array_buffer.buffer;
7193 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7194 					    tracing_gen_ctx());
7195 	if (unlikely(!event))
7196 		/* Ring buffer disabled, return as if not open for write */
7197 		return -EBADF;
7198 
7199 	entry = ring_buffer_event_data(event);
7200 	entry->ip = _THIS_IP_;
7201 
7202 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7203 	if (len) {
7204 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7205 		cnt = FAULTED_SIZE;
7206 		written = -EFAULT;
7207 	} else
7208 		written = cnt;
7209 
7210 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7211 		/* do not add \n before testing triggers, but add \0 */
7212 		entry->buf[cnt] = '\0';
7213 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7214 	}
7215 
7216 	if (entry->buf[cnt - 1] != '\n') {
7217 		entry->buf[cnt] = '\n';
7218 		entry->buf[cnt + 1] = '\0';
7219 	} else
7220 		entry->buf[cnt] = '\0';
7221 
7222 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7223 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7224 	__buffer_unlock_commit(buffer, event);
7225 
7226 	if (tt)
7227 		event_triggers_post_call(tr->trace_marker_file, tt);
7228 
7229 	return written;
7230 }
7231 
7232 /* Limit it for now to 3K (including tag) */
7233 #define RAW_DATA_MAX_SIZE (1024*3)
7234 
7235 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7236 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7237 					size_t cnt, loff_t *fpos)
7238 {
7239 	struct trace_array *tr = filp->private_data;
7240 	struct ring_buffer_event *event;
7241 	struct trace_buffer *buffer;
7242 	struct raw_data_entry *entry;
7243 	ssize_t written;
7244 	int size;
7245 	int len;
7246 
7247 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7248 
7249 	if (tracing_disabled)
7250 		return -EINVAL;
7251 
7252 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7253 		return -EINVAL;
7254 
7255 	/* The marker must at least have a tag id */
7256 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7257 		return -EINVAL;
7258 
7259 	if (cnt > TRACE_BUF_SIZE)
7260 		cnt = TRACE_BUF_SIZE;
7261 
7262 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7263 
7264 	size = sizeof(*entry) + cnt;
7265 	if (cnt < FAULT_SIZE_ID)
7266 		size += FAULT_SIZE_ID - cnt;
7267 
7268 	buffer = tr->array_buffer.buffer;
7269 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7270 					    tracing_gen_ctx());
7271 	if (!event)
7272 		/* Ring buffer disabled, return as if not open for write */
7273 		return -EBADF;
7274 
7275 	entry = ring_buffer_event_data(event);
7276 
7277 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7278 	if (len) {
7279 		entry->id = -1;
7280 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7281 		written = -EFAULT;
7282 	} else
7283 		written = cnt;
7284 
7285 	__buffer_unlock_commit(buffer, event);
7286 
7287 	return written;
7288 }
7289 
tracing_clock_show(struct seq_file * m,void * v)7290 static int tracing_clock_show(struct seq_file *m, void *v)
7291 {
7292 	struct trace_array *tr = m->private;
7293 	int i;
7294 
7295 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7296 		seq_printf(m,
7297 			"%s%s%s%s", i ? " " : "",
7298 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7299 			i == tr->clock_id ? "]" : "");
7300 	seq_putc(m, '\n');
7301 
7302 	return 0;
7303 }
7304 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7305 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7306 {
7307 	int i;
7308 
7309 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7310 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7311 			break;
7312 	}
7313 	if (i == ARRAY_SIZE(trace_clocks))
7314 		return -EINVAL;
7315 
7316 	mutex_lock(&trace_types_lock);
7317 
7318 	tr->clock_id = i;
7319 
7320 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7321 
7322 	/*
7323 	 * New clock may not be consistent with the previous clock.
7324 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7325 	 */
7326 	tracing_reset_online_cpus(&tr->array_buffer);
7327 
7328 #ifdef CONFIG_TRACER_MAX_TRACE
7329 	if (tr->max_buffer.buffer)
7330 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7331 	tracing_reset_online_cpus(&tr->max_buffer);
7332 #endif
7333 
7334 	mutex_unlock(&trace_types_lock);
7335 
7336 	return 0;
7337 }
7338 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7339 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7340 				   size_t cnt, loff_t *fpos)
7341 {
7342 	struct seq_file *m = filp->private_data;
7343 	struct trace_array *tr = m->private;
7344 	char buf[64];
7345 	const char *clockstr;
7346 	int ret;
7347 
7348 	if (cnt >= sizeof(buf))
7349 		return -EINVAL;
7350 
7351 	if (copy_from_user(buf, ubuf, cnt))
7352 		return -EFAULT;
7353 
7354 	buf[cnt] = 0;
7355 
7356 	clockstr = strstrip(buf);
7357 
7358 	ret = tracing_set_clock(tr, clockstr);
7359 	if (ret)
7360 		return ret;
7361 
7362 	*fpos += cnt;
7363 
7364 	return cnt;
7365 }
7366 
tracing_clock_open(struct inode * inode,struct file * file)7367 static int tracing_clock_open(struct inode *inode, struct file *file)
7368 {
7369 	struct trace_array *tr = inode->i_private;
7370 	int ret;
7371 
7372 	ret = tracing_check_open_get_tr(tr);
7373 	if (ret)
7374 		return ret;
7375 
7376 	ret = single_open(file, tracing_clock_show, inode->i_private);
7377 	if (ret < 0)
7378 		trace_array_put(tr);
7379 
7380 	return ret;
7381 }
7382 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7383 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7384 {
7385 	struct trace_array *tr = m->private;
7386 
7387 	mutex_lock(&trace_types_lock);
7388 
7389 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7390 		seq_puts(m, "delta [absolute]\n");
7391 	else
7392 		seq_puts(m, "[delta] absolute\n");
7393 
7394 	mutex_unlock(&trace_types_lock);
7395 
7396 	return 0;
7397 }
7398 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7399 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7400 {
7401 	struct trace_array *tr = inode->i_private;
7402 	int ret;
7403 
7404 	ret = tracing_check_open_get_tr(tr);
7405 	if (ret)
7406 		return ret;
7407 
7408 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7409 	if (ret < 0)
7410 		trace_array_put(tr);
7411 
7412 	return ret;
7413 }
7414 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7415 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7416 {
7417 	if (rbe == this_cpu_read(trace_buffered_event))
7418 		return ring_buffer_time_stamp(buffer);
7419 
7420 	return ring_buffer_event_time_stamp(buffer, rbe);
7421 }
7422 
7423 /*
7424  * Set or disable using the per CPU trace_buffer_event when possible.
7425  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7426 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7427 {
7428 	int ret = 0;
7429 
7430 	mutex_lock(&trace_types_lock);
7431 
7432 	if (set && tr->no_filter_buffering_ref++)
7433 		goto out;
7434 
7435 	if (!set) {
7436 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7437 			ret = -EINVAL;
7438 			goto out;
7439 		}
7440 
7441 		--tr->no_filter_buffering_ref;
7442 	}
7443  out:
7444 	mutex_unlock(&trace_types_lock);
7445 
7446 	return ret;
7447 }
7448 
7449 struct ftrace_buffer_info {
7450 	struct trace_iterator	iter;
7451 	void			*spare;
7452 	unsigned int		spare_cpu;
7453 	unsigned int		read;
7454 };
7455 
7456 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7457 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7458 {
7459 	struct trace_array *tr = inode->i_private;
7460 	struct trace_iterator *iter;
7461 	struct seq_file *m;
7462 	int ret;
7463 
7464 	ret = tracing_check_open_get_tr(tr);
7465 	if (ret)
7466 		return ret;
7467 
7468 	if (file->f_mode & FMODE_READ) {
7469 		iter = __tracing_open(inode, file, true);
7470 		if (IS_ERR(iter))
7471 			ret = PTR_ERR(iter);
7472 	} else {
7473 		/* Writes still need the seq_file to hold the private data */
7474 		ret = -ENOMEM;
7475 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7476 		if (!m)
7477 			goto out;
7478 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7479 		if (!iter) {
7480 			kfree(m);
7481 			goto out;
7482 		}
7483 		ret = 0;
7484 
7485 		iter->tr = tr;
7486 		iter->array_buffer = &tr->max_buffer;
7487 		iter->cpu_file = tracing_get_cpu(inode);
7488 		m->private = iter;
7489 		file->private_data = m;
7490 	}
7491 out:
7492 	if (ret < 0)
7493 		trace_array_put(tr);
7494 
7495 	return ret;
7496 }
7497 
tracing_swap_cpu_buffer(void * tr)7498 static void tracing_swap_cpu_buffer(void *tr)
7499 {
7500 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7501 }
7502 
7503 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7504 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7505 		       loff_t *ppos)
7506 {
7507 	struct seq_file *m = filp->private_data;
7508 	struct trace_iterator *iter = m->private;
7509 	struct trace_array *tr = iter->tr;
7510 	unsigned long val;
7511 	int ret;
7512 
7513 	ret = tracing_update_buffers();
7514 	if (ret < 0)
7515 		return ret;
7516 
7517 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7518 	if (ret)
7519 		return ret;
7520 
7521 	mutex_lock(&trace_types_lock);
7522 
7523 	if (tr->current_trace->use_max_tr) {
7524 		ret = -EBUSY;
7525 		goto out;
7526 	}
7527 
7528 	local_irq_disable();
7529 	arch_spin_lock(&tr->max_lock);
7530 	if (tr->cond_snapshot)
7531 		ret = -EBUSY;
7532 	arch_spin_unlock(&tr->max_lock);
7533 	local_irq_enable();
7534 	if (ret)
7535 		goto out;
7536 
7537 	switch (val) {
7538 	case 0:
7539 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7540 			ret = -EINVAL;
7541 			break;
7542 		}
7543 		if (tr->allocated_snapshot)
7544 			free_snapshot(tr);
7545 		break;
7546 	case 1:
7547 /* Only allow per-cpu swap if the ring buffer supports it */
7548 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7549 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7550 			ret = -EINVAL;
7551 			break;
7552 		}
7553 #endif
7554 		if (tr->allocated_snapshot)
7555 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7556 					&tr->array_buffer, iter->cpu_file);
7557 		else
7558 			ret = tracing_alloc_snapshot_instance(tr);
7559 		if (ret < 0)
7560 			break;
7561 		/* Now, we're going to swap */
7562 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7563 			local_irq_disable();
7564 			update_max_tr(tr, current, smp_processor_id(), NULL);
7565 			local_irq_enable();
7566 		} else {
7567 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7568 						 (void *)tr, 1);
7569 		}
7570 		break;
7571 	default:
7572 		if (tr->allocated_snapshot) {
7573 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7574 				tracing_reset_online_cpus(&tr->max_buffer);
7575 			else
7576 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7577 		}
7578 		break;
7579 	}
7580 
7581 	if (ret >= 0) {
7582 		*ppos += cnt;
7583 		ret = cnt;
7584 	}
7585 out:
7586 	mutex_unlock(&trace_types_lock);
7587 	return ret;
7588 }
7589 
tracing_snapshot_release(struct inode * inode,struct file * file)7590 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7591 {
7592 	struct seq_file *m = file->private_data;
7593 	int ret;
7594 
7595 	ret = tracing_release(inode, file);
7596 
7597 	if (file->f_mode & FMODE_READ)
7598 		return ret;
7599 
7600 	/* If write only, the seq_file is just a stub */
7601 	if (m)
7602 		kfree(m->private);
7603 	kfree(m);
7604 
7605 	return 0;
7606 }
7607 
7608 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7609 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7610 				    size_t count, loff_t *ppos);
7611 static int tracing_buffers_release(struct inode *inode, struct file *file);
7612 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7613 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7614 
snapshot_raw_open(struct inode * inode,struct file * filp)7615 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7616 {
7617 	struct ftrace_buffer_info *info;
7618 	int ret;
7619 
7620 	/* The following checks for tracefs lockdown */
7621 	ret = tracing_buffers_open(inode, filp);
7622 	if (ret < 0)
7623 		return ret;
7624 
7625 	info = filp->private_data;
7626 
7627 	if (info->iter.trace->use_max_tr) {
7628 		tracing_buffers_release(inode, filp);
7629 		return -EBUSY;
7630 	}
7631 
7632 	info->iter.snapshot = true;
7633 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7634 
7635 	return ret;
7636 }
7637 
7638 #endif /* CONFIG_TRACER_SNAPSHOT */
7639 
7640 
7641 static const struct file_operations tracing_thresh_fops = {
7642 	.open		= tracing_open_generic,
7643 	.read		= tracing_thresh_read,
7644 	.write		= tracing_thresh_write,
7645 	.llseek		= generic_file_llseek,
7646 };
7647 
7648 #ifdef CONFIG_TRACER_MAX_TRACE
7649 static const struct file_operations tracing_max_lat_fops = {
7650 	.open		= tracing_open_generic_tr,
7651 	.read		= tracing_max_lat_read,
7652 	.write		= tracing_max_lat_write,
7653 	.llseek		= generic_file_llseek,
7654 	.release	= tracing_release_generic_tr,
7655 };
7656 #endif
7657 
7658 static const struct file_operations set_tracer_fops = {
7659 	.open		= tracing_open_generic_tr,
7660 	.read		= tracing_set_trace_read,
7661 	.write		= tracing_set_trace_write,
7662 	.llseek		= generic_file_llseek,
7663 	.release	= tracing_release_generic_tr,
7664 };
7665 
7666 static const struct file_operations tracing_pipe_fops = {
7667 	.open		= tracing_open_pipe,
7668 	.poll		= tracing_poll_pipe,
7669 	.read		= tracing_read_pipe,
7670 	.splice_read	= tracing_splice_read_pipe,
7671 	.release	= tracing_release_pipe,
7672 	.llseek		= no_llseek,
7673 };
7674 
7675 static const struct file_operations tracing_entries_fops = {
7676 	.open		= tracing_open_generic_tr,
7677 	.read		= tracing_entries_read,
7678 	.write		= tracing_entries_write,
7679 	.llseek		= generic_file_llseek,
7680 	.release	= tracing_release_generic_tr,
7681 };
7682 
7683 static const struct file_operations tracing_total_entries_fops = {
7684 	.open		= tracing_open_generic_tr,
7685 	.read		= tracing_total_entries_read,
7686 	.llseek		= generic_file_llseek,
7687 	.release	= tracing_release_generic_tr,
7688 };
7689 
7690 static const struct file_operations tracing_free_buffer_fops = {
7691 	.open		= tracing_open_generic_tr,
7692 	.write		= tracing_free_buffer_write,
7693 	.release	= tracing_free_buffer_release,
7694 };
7695 
7696 static const struct file_operations tracing_mark_fops = {
7697 	.open		= tracing_mark_open,
7698 	.write		= tracing_mark_write,
7699 	.release	= tracing_release_generic_tr,
7700 };
7701 
7702 static const struct file_operations tracing_mark_raw_fops = {
7703 	.open		= tracing_mark_open,
7704 	.write		= tracing_mark_raw_write,
7705 	.release	= tracing_release_generic_tr,
7706 };
7707 
7708 static const struct file_operations trace_clock_fops = {
7709 	.open		= tracing_clock_open,
7710 	.read		= seq_read,
7711 	.llseek		= seq_lseek,
7712 	.release	= tracing_single_release_tr,
7713 	.write		= tracing_clock_write,
7714 };
7715 
7716 static const struct file_operations trace_time_stamp_mode_fops = {
7717 	.open		= tracing_time_stamp_mode_open,
7718 	.read		= seq_read,
7719 	.llseek		= seq_lseek,
7720 	.release	= tracing_single_release_tr,
7721 };
7722 
7723 #ifdef CONFIG_TRACER_SNAPSHOT
7724 static const struct file_operations snapshot_fops = {
7725 	.open		= tracing_snapshot_open,
7726 	.read		= seq_read,
7727 	.write		= tracing_snapshot_write,
7728 	.llseek		= tracing_lseek,
7729 	.release	= tracing_snapshot_release,
7730 };
7731 
7732 static const struct file_operations snapshot_raw_fops = {
7733 	.open		= snapshot_raw_open,
7734 	.read		= tracing_buffers_read,
7735 	.release	= tracing_buffers_release,
7736 	.splice_read	= tracing_buffers_splice_read,
7737 	.llseek		= no_llseek,
7738 };
7739 
7740 #endif /* CONFIG_TRACER_SNAPSHOT */
7741 
7742 /*
7743  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7744  * @filp: The active open file structure
7745  * @ubuf: The userspace provided buffer to read value into
7746  * @cnt: The maximum number of bytes to read
7747  * @ppos: The current "file" position
7748  *
7749  * This function implements the write interface for a struct trace_min_max_param.
7750  * The filp->private_data must point to a trace_min_max_param structure that
7751  * defines where to write the value, the min and the max acceptable values,
7752  * and a lock to protect the write.
7753  */
7754 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7755 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7756 {
7757 	struct trace_min_max_param *param = filp->private_data;
7758 	u64 val;
7759 	int err;
7760 
7761 	if (!param)
7762 		return -EFAULT;
7763 
7764 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7765 	if (err)
7766 		return err;
7767 
7768 	if (param->lock)
7769 		mutex_lock(param->lock);
7770 
7771 	if (param->min && val < *param->min)
7772 		err = -EINVAL;
7773 
7774 	if (param->max && val > *param->max)
7775 		err = -EINVAL;
7776 
7777 	if (!err)
7778 		*param->val = val;
7779 
7780 	if (param->lock)
7781 		mutex_unlock(param->lock);
7782 
7783 	if (err)
7784 		return err;
7785 
7786 	return cnt;
7787 }
7788 
7789 /*
7790  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7791  * @filp: The active open file structure
7792  * @ubuf: The userspace provided buffer to read value into
7793  * @cnt: The maximum number of bytes to read
7794  * @ppos: The current "file" position
7795  *
7796  * This function implements the read interface for a struct trace_min_max_param.
7797  * The filp->private_data must point to a trace_min_max_param struct with valid
7798  * data.
7799  */
7800 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7801 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7802 {
7803 	struct trace_min_max_param *param = filp->private_data;
7804 	char buf[U64_STR_SIZE];
7805 	int len;
7806 	u64 val;
7807 
7808 	if (!param)
7809 		return -EFAULT;
7810 
7811 	val = *param->val;
7812 
7813 	if (cnt > sizeof(buf))
7814 		cnt = sizeof(buf);
7815 
7816 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7817 
7818 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7819 }
7820 
7821 const struct file_operations trace_min_max_fops = {
7822 	.open		= tracing_open_generic,
7823 	.read		= trace_min_max_read,
7824 	.write		= trace_min_max_write,
7825 };
7826 
7827 #define TRACING_LOG_ERRS_MAX	8
7828 #define TRACING_LOG_LOC_MAX	128
7829 
7830 #define CMD_PREFIX "  Command: "
7831 
7832 struct err_info {
7833 	const char	**errs;	/* ptr to loc-specific array of err strings */
7834 	u8		type;	/* index into errs -> specific err string */
7835 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7836 	u64		ts;
7837 };
7838 
7839 struct tracing_log_err {
7840 	struct list_head	list;
7841 	struct err_info		info;
7842 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7843 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7844 };
7845 
7846 static DEFINE_MUTEX(tracing_err_log_lock);
7847 
get_tracing_log_err(struct trace_array * tr)7848 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7849 {
7850 	struct tracing_log_err *err;
7851 
7852 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7853 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7854 		if (!err)
7855 			err = ERR_PTR(-ENOMEM);
7856 		else
7857 			tr->n_err_log_entries++;
7858 
7859 		return err;
7860 	}
7861 
7862 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7863 	list_del(&err->list);
7864 
7865 	return err;
7866 }
7867 
7868 /**
7869  * err_pos - find the position of a string within a command for error careting
7870  * @cmd: The tracing command that caused the error
7871  * @str: The string to position the caret at within @cmd
7872  *
7873  * Finds the position of the first occurrence of @str within @cmd.  The
7874  * return value can be passed to tracing_log_err() for caret placement
7875  * within @cmd.
7876  *
7877  * Returns the index within @cmd of the first occurrence of @str or 0
7878  * if @str was not found.
7879  */
err_pos(char * cmd,const char * str)7880 unsigned int err_pos(char *cmd, const char *str)
7881 {
7882 	char *found;
7883 
7884 	if (WARN_ON(!strlen(cmd)))
7885 		return 0;
7886 
7887 	found = strstr(cmd, str);
7888 	if (found)
7889 		return found - cmd;
7890 
7891 	return 0;
7892 }
7893 
7894 /**
7895  * tracing_log_err - write an error to the tracing error log
7896  * @tr: The associated trace array for the error (NULL for top level array)
7897  * @loc: A string describing where the error occurred
7898  * @cmd: The tracing command that caused the error
7899  * @errs: The array of loc-specific static error strings
7900  * @type: The index into errs[], which produces the specific static err string
7901  * @pos: The position the caret should be placed in the cmd
7902  *
7903  * Writes an error into tracing/error_log of the form:
7904  *
7905  * <loc>: error: <text>
7906  *   Command: <cmd>
7907  *              ^
7908  *
7909  * tracing/error_log is a small log file containing the last
7910  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7911  * unless there has been a tracing error, and the error log can be
7912  * cleared and have its memory freed by writing the empty string in
7913  * truncation mode to it i.e. echo > tracing/error_log.
7914  *
7915  * NOTE: the @errs array along with the @type param are used to
7916  * produce a static error string - this string is not copied and saved
7917  * when the error is logged - only a pointer to it is saved.  See
7918  * existing callers for examples of how static strings are typically
7919  * defined for use with tracing_log_err().
7920  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7921 void tracing_log_err(struct trace_array *tr,
7922 		     const char *loc, const char *cmd,
7923 		     const char **errs, u8 type, u8 pos)
7924 {
7925 	struct tracing_log_err *err;
7926 
7927 	if (!tr)
7928 		tr = &global_trace;
7929 
7930 	mutex_lock(&tracing_err_log_lock);
7931 	err = get_tracing_log_err(tr);
7932 	if (PTR_ERR(err) == -ENOMEM) {
7933 		mutex_unlock(&tracing_err_log_lock);
7934 		return;
7935 	}
7936 
7937 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7938 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7939 
7940 	err->info.errs = errs;
7941 	err->info.type = type;
7942 	err->info.pos = pos;
7943 	err->info.ts = local_clock();
7944 
7945 	list_add_tail(&err->list, &tr->err_log);
7946 	mutex_unlock(&tracing_err_log_lock);
7947 }
7948 
clear_tracing_err_log(struct trace_array * tr)7949 static void clear_tracing_err_log(struct trace_array *tr)
7950 {
7951 	struct tracing_log_err *err, *next;
7952 
7953 	mutex_lock(&tracing_err_log_lock);
7954 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7955 		list_del(&err->list);
7956 		kfree(err);
7957 	}
7958 
7959 	tr->n_err_log_entries = 0;
7960 	mutex_unlock(&tracing_err_log_lock);
7961 }
7962 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7963 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7964 {
7965 	struct trace_array *tr = m->private;
7966 
7967 	mutex_lock(&tracing_err_log_lock);
7968 
7969 	return seq_list_start(&tr->err_log, *pos);
7970 }
7971 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7972 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7973 {
7974 	struct trace_array *tr = m->private;
7975 
7976 	return seq_list_next(v, &tr->err_log, pos);
7977 }
7978 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7979 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7980 {
7981 	mutex_unlock(&tracing_err_log_lock);
7982 }
7983 
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7984 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7985 {
7986 	u8 i;
7987 
7988 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7989 		seq_putc(m, ' ');
7990 	for (i = 0; i < pos; i++)
7991 		seq_putc(m, ' ');
7992 	seq_puts(m, "^\n");
7993 }
7994 
tracing_err_log_seq_show(struct seq_file * m,void * v)7995 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7996 {
7997 	struct tracing_log_err *err = v;
7998 
7999 	if (err) {
8000 		const char *err_text = err->info.errs[err->info.type];
8001 		u64 sec = err->info.ts;
8002 		u32 nsec;
8003 
8004 		nsec = do_div(sec, NSEC_PER_SEC);
8005 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8006 			   err->loc, err_text);
8007 		seq_printf(m, "%s", err->cmd);
8008 		tracing_err_log_show_pos(m, err->info.pos);
8009 	}
8010 
8011 	return 0;
8012 }
8013 
8014 static const struct seq_operations tracing_err_log_seq_ops = {
8015 	.start  = tracing_err_log_seq_start,
8016 	.next   = tracing_err_log_seq_next,
8017 	.stop   = tracing_err_log_seq_stop,
8018 	.show   = tracing_err_log_seq_show
8019 };
8020 
tracing_err_log_open(struct inode * inode,struct file * file)8021 static int tracing_err_log_open(struct inode *inode, struct file *file)
8022 {
8023 	struct trace_array *tr = inode->i_private;
8024 	int ret = 0;
8025 
8026 	ret = tracing_check_open_get_tr(tr);
8027 	if (ret)
8028 		return ret;
8029 
8030 	/* If this file was opened for write, then erase contents */
8031 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8032 		clear_tracing_err_log(tr);
8033 
8034 	if (file->f_mode & FMODE_READ) {
8035 		ret = seq_open(file, &tracing_err_log_seq_ops);
8036 		if (!ret) {
8037 			struct seq_file *m = file->private_data;
8038 			m->private = tr;
8039 		} else {
8040 			trace_array_put(tr);
8041 		}
8042 	}
8043 	return ret;
8044 }
8045 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8046 static ssize_t tracing_err_log_write(struct file *file,
8047 				     const char __user *buffer,
8048 				     size_t count, loff_t *ppos)
8049 {
8050 	return count;
8051 }
8052 
tracing_err_log_release(struct inode * inode,struct file * file)8053 static int tracing_err_log_release(struct inode *inode, struct file *file)
8054 {
8055 	struct trace_array *tr = inode->i_private;
8056 
8057 	trace_array_put(tr);
8058 
8059 	if (file->f_mode & FMODE_READ)
8060 		seq_release(inode, file);
8061 
8062 	return 0;
8063 }
8064 
8065 static const struct file_operations tracing_err_log_fops = {
8066 	.open           = tracing_err_log_open,
8067 	.write		= tracing_err_log_write,
8068 	.read           = seq_read,
8069 	.llseek         = tracing_lseek,
8070 	.release        = tracing_err_log_release,
8071 };
8072 
tracing_buffers_open(struct inode * inode,struct file * filp)8073 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8074 {
8075 	struct trace_array *tr = inode->i_private;
8076 	struct ftrace_buffer_info *info;
8077 	int ret;
8078 
8079 	ret = tracing_check_open_get_tr(tr);
8080 	if (ret)
8081 		return ret;
8082 
8083 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8084 	if (!info) {
8085 		trace_array_put(tr);
8086 		return -ENOMEM;
8087 	}
8088 
8089 	mutex_lock(&trace_types_lock);
8090 
8091 	info->iter.tr		= tr;
8092 	info->iter.cpu_file	= tracing_get_cpu(inode);
8093 	info->iter.trace	= tr->current_trace;
8094 	info->iter.array_buffer = &tr->array_buffer;
8095 	info->spare		= NULL;
8096 	/* Force reading ring buffer for first read */
8097 	info->read		= (unsigned int)-1;
8098 
8099 	filp->private_data = info;
8100 
8101 	tr->trace_ref++;
8102 
8103 	mutex_unlock(&trace_types_lock);
8104 
8105 	ret = nonseekable_open(inode, filp);
8106 	if (ret < 0)
8107 		trace_array_put(tr);
8108 
8109 	return ret;
8110 }
8111 
8112 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8113 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8114 {
8115 	struct ftrace_buffer_info *info = filp->private_data;
8116 	struct trace_iterator *iter = &info->iter;
8117 
8118 	return trace_poll(iter, filp, poll_table);
8119 }
8120 
8121 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8122 tracing_buffers_read(struct file *filp, char __user *ubuf,
8123 		     size_t count, loff_t *ppos)
8124 {
8125 	struct ftrace_buffer_info *info = filp->private_data;
8126 	struct trace_iterator *iter = &info->iter;
8127 	ssize_t ret = 0;
8128 	ssize_t size;
8129 
8130 	if (!count)
8131 		return 0;
8132 
8133 #ifdef CONFIG_TRACER_MAX_TRACE
8134 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8135 		return -EBUSY;
8136 #endif
8137 
8138 	if (!info->spare) {
8139 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8140 							  iter->cpu_file);
8141 		if (IS_ERR(info->spare)) {
8142 			ret = PTR_ERR(info->spare);
8143 			info->spare = NULL;
8144 		} else {
8145 			info->spare_cpu = iter->cpu_file;
8146 		}
8147 	}
8148 	if (!info->spare)
8149 		return ret;
8150 
8151 	/* Do we have previous read data to read? */
8152 	if (info->read < PAGE_SIZE)
8153 		goto read;
8154 
8155  again:
8156 	trace_access_lock(iter->cpu_file);
8157 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8158 				    &info->spare,
8159 				    count,
8160 				    iter->cpu_file, 0);
8161 	trace_access_unlock(iter->cpu_file);
8162 
8163 	if (ret < 0) {
8164 		if (trace_empty(iter)) {
8165 			if ((filp->f_flags & O_NONBLOCK))
8166 				return -EAGAIN;
8167 
8168 			ret = wait_on_pipe(iter, 0);
8169 			if (ret)
8170 				return ret;
8171 
8172 			goto again;
8173 		}
8174 		return 0;
8175 	}
8176 
8177 	info->read = 0;
8178  read:
8179 	size = PAGE_SIZE - info->read;
8180 	if (size > count)
8181 		size = count;
8182 
8183 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8184 	if (ret == size)
8185 		return -EFAULT;
8186 
8187 	size -= ret;
8188 
8189 	*ppos += size;
8190 	info->read += size;
8191 
8192 	return size;
8193 }
8194 
tracing_buffers_release(struct inode * inode,struct file * file)8195 static int tracing_buffers_release(struct inode *inode, struct file *file)
8196 {
8197 	struct ftrace_buffer_info *info = file->private_data;
8198 	struct trace_iterator *iter = &info->iter;
8199 
8200 	mutex_lock(&trace_types_lock);
8201 
8202 	iter->tr->trace_ref--;
8203 
8204 	__trace_array_put(iter->tr);
8205 
8206 	if (info->spare)
8207 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8208 					   info->spare_cpu, info->spare);
8209 	kvfree(info);
8210 
8211 	mutex_unlock(&trace_types_lock);
8212 
8213 	return 0;
8214 }
8215 
8216 struct buffer_ref {
8217 	struct trace_buffer	*buffer;
8218 	void			*page;
8219 	int			cpu;
8220 	refcount_t		refcount;
8221 };
8222 
buffer_ref_release(struct buffer_ref * ref)8223 static void buffer_ref_release(struct buffer_ref *ref)
8224 {
8225 	if (!refcount_dec_and_test(&ref->refcount))
8226 		return;
8227 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8228 	kfree(ref);
8229 }
8230 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8231 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8232 				    struct pipe_buffer *buf)
8233 {
8234 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8235 
8236 	buffer_ref_release(ref);
8237 	buf->private = 0;
8238 }
8239 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8240 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8241 				struct pipe_buffer *buf)
8242 {
8243 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8244 
8245 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8246 		return false;
8247 
8248 	refcount_inc(&ref->refcount);
8249 	return true;
8250 }
8251 
8252 /* Pipe buffer operations for a buffer. */
8253 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8254 	.release		= buffer_pipe_buf_release,
8255 	.get			= buffer_pipe_buf_get,
8256 };
8257 
8258 /*
8259  * Callback from splice_to_pipe(), if we need to release some pages
8260  * at the end of the spd in case we error'ed out in filling the pipe.
8261  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8262 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8263 {
8264 	struct buffer_ref *ref =
8265 		(struct buffer_ref *)spd->partial[i].private;
8266 
8267 	buffer_ref_release(ref);
8268 	spd->partial[i].private = 0;
8269 }
8270 
8271 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8272 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8273 			    struct pipe_inode_info *pipe, size_t len,
8274 			    unsigned int flags)
8275 {
8276 	struct ftrace_buffer_info *info = file->private_data;
8277 	struct trace_iterator *iter = &info->iter;
8278 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8279 	struct page *pages_def[PIPE_DEF_BUFFERS];
8280 	struct splice_pipe_desc spd = {
8281 		.pages		= pages_def,
8282 		.partial	= partial_def,
8283 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8284 		.ops		= &buffer_pipe_buf_ops,
8285 		.spd_release	= buffer_spd_release,
8286 	};
8287 	struct buffer_ref *ref;
8288 	int entries, i;
8289 	ssize_t ret = 0;
8290 
8291 #ifdef CONFIG_TRACER_MAX_TRACE
8292 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8293 		return -EBUSY;
8294 #endif
8295 
8296 	if (*ppos & (PAGE_SIZE - 1))
8297 		return -EINVAL;
8298 
8299 	if (len & (PAGE_SIZE - 1)) {
8300 		if (len < PAGE_SIZE)
8301 			return -EINVAL;
8302 		len &= PAGE_MASK;
8303 	}
8304 
8305 	if (splice_grow_spd(pipe, &spd))
8306 		return -ENOMEM;
8307 
8308  again:
8309 	trace_access_lock(iter->cpu_file);
8310 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8311 
8312 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8313 		struct page *page;
8314 		int r;
8315 
8316 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8317 		if (!ref) {
8318 			ret = -ENOMEM;
8319 			break;
8320 		}
8321 
8322 		refcount_set(&ref->refcount, 1);
8323 		ref->buffer = iter->array_buffer->buffer;
8324 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8325 		if (IS_ERR(ref->page)) {
8326 			ret = PTR_ERR(ref->page);
8327 			ref->page = NULL;
8328 			kfree(ref);
8329 			break;
8330 		}
8331 		ref->cpu = iter->cpu_file;
8332 
8333 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8334 					  len, iter->cpu_file, 1);
8335 		if (r < 0) {
8336 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8337 						   ref->page);
8338 			kfree(ref);
8339 			break;
8340 		}
8341 
8342 		page = virt_to_page(ref->page);
8343 
8344 		spd.pages[i] = page;
8345 		spd.partial[i].len = PAGE_SIZE;
8346 		spd.partial[i].offset = 0;
8347 		spd.partial[i].private = (unsigned long)ref;
8348 		spd.nr_pages++;
8349 		*ppos += PAGE_SIZE;
8350 
8351 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8352 	}
8353 
8354 	trace_access_unlock(iter->cpu_file);
8355 	spd.nr_pages = i;
8356 
8357 	/* did we read anything? */
8358 	if (!spd.nr_pages) {
8359 		if (ret)
8360 			goto out;
8361 
8362 		ret = -EAGAIN;
8363 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8364 			goto out;
8365 
8366 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8367 		if (ret)
8368 			goto out;
8369 
8370 		/* No need to wait after waking up when tracing is off */
8371 		if (!tracer_tracing_is_on(iter->tr))
8372 			goto out;
8373 
8374 		goto again;
8375 	}
8376 
8377 	ret = splice_to_pipe(pipe, &spd);
8378 out:
8379 	splice_shrink_spd(&spd);
8380 
8381 	return ret;
8382 }
8383 
8384 static const struct file_operations tracing_buffers_fops = {
8385 	.open		= tracing_buffers_open,
8386 	.read		= tracing_buffers_read,
8387 	.poll		= tracing_buffers_poll,
8388 	.release	= tracing_buffers_release,
8389 	.splice_read	= tracing_buffers_splice_read,
8390 	.llseek		= no_llseek,
8391 };
8392 
8393 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8394 tracing_stats_read(struct file *filp, char __user *ubuf,
8395 		   size_t count, loff_t *ppos)
8396 {
8397 	struct inode *inode = file_inode(filp);
8398 	struct trace_array *tr = inode->i_private;
8399 	struct array_buffer *trace_buf = &tr->array_buffer;
8400 	int cpu = tracing_get_cpu(inode);
8401 	struct trace_seq *s;
8402 	unsigned long cnt;
8403 	unsigned long long t;
8404 	unsigned long usec_rem;
8405 
8406 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8407 	if (!s)
8408 		return -ENOMEM;
8409 
8410 	trace_seq_init(s);
8411 
8412 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8413 	trace_seq_printf(s, "entries: %ld\n", cnt);
8414 
8415 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8416 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8417 
8418 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8419 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8420 
8421 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8422 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8423 
8424 	if (trace_clocks[tr->clock_id].in_ns) {
8425 		/* local or global for trace_clock */
8426 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8427 		usec_rem = do_div(t, USEC_PER_SEC);
8428 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8429 								t, usec_rem);
8430 
8431 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8432 		usec_rem = do_div(t, USEC_PER_SEC);
8433 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8434 	} else {
8435 		/* counter or tsc mode for trace_clock */
8436 		trace_seq_printf(s, "oldest event ts: %llu\n",
8437 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8438 
8439 		trace_seq_printf(s, "now ts: %llu\n",
8440 				ring_buffer_time_stamp(trace_buf->buffer));
8441 	}
8442 
8443 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8444 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8445 
8446 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8447 	trace_seq_printf(s, "read events: %ld\n", cnt);
8448 
8449 	count = simple_read_from_buffer(ubuf, count, ppos,
8450 					s->buffer, trace_seq_used(s));
8451 
8452 	kfree(s);
8453 
8454 	return count;
8455 }
8456 
8457 static const struct file_operations tracing_stats_fops = {
8458 	.open		= tracing_open_generic_tr,
8459 	.read		= tracing_stats_read,
8460 	.llseek		= generic_file_llseek,
8461 	.release	= tracing_release_generic_tr,
8462 };
8463 
8464 #ifdef CONFIG_DYNAMIC_FTRACE
8465 
8466 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8467 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8468 		  size_t cnt, loff_t *ppos)
8469 {
8470 	ssize_t ret;
8471 	char *buf;
8472 	int r;
8473 
8474 	/* 256 should be plenty to hold the amount needed */
8475 	buf = kmalloc(256, GFP_KERNEL);
8476 	if (!buf)
8477 		return -ENOMEM;
8478 
8479 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8480 		      ftrace_update_tot_cnt,
8481 		      ftrace_number_of_pages,
8482 		      ftrace_number_of_groups);
8483 
8484 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8485 	kfree(buf);
8486 	return ret;
8487 }
8488 
8489 static const struct file_operations tracing_dyn_info_fops = {
8490 	.open		= tracing_open_generic,
8491 	.read		= tracing_read_dyn_info,
8492 	.llseek		= generic_file_llseek,
8493 };
8494 #endif /* CONFIG_DYNAMIC_FTRACE */
8495 
8496 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8497 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8498 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8499 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8500 		void *data)
8501 {
8502 	tracing_snapshot_instance(tr);
8503 }
8504 
8505 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8506 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8507 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8508 		      void *data)
8509 {
8510 	struct ftrace_func_mapper *mapper = data;
8511 	long *count = NULL;
8512 
8513 	if (mapper)
8514 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8515 
8516 	if (count) {
8517 
8518 		if (*count <= 0)
8519 			return;
8520 
8521 		(*count)--;
8522 	}
8523 
8524 	tracing_snapshot_instance(tr);
8525 }
8526 
8527 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8528 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8529 		      struct ftrace_probe_ops *ops, void *data)
8530 {
8531 	struct ftrace_func_mapper *mapper = data;
8532 	long *count = NULL;
8533 
8534 	seq_printf(m, "%ps:", (void *)ip);
8535 
8536 	seq_puts(m, "snapshot");
8537 
8538 	if (mapper)
8539 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8540 
8541 	if (count)
8542 		seq_printf(m, ":count=%ld\n", *count);
8543 	else
8544 		seq_puts(m, ":unlimited\n");
8545 
8546 	return 0;
8547 }
8548 
8549 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8550 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8551 		     unsigned long ip, void *init_data, void **data)
8552 {
8553 	struct ftrace_func_mapper *mapper = *data;
8554 
8555 	if (!mapper) {
8556 		mapper = allocate_ftrace_func_mapper();
8557 		if (!mapper)
8558 			return -ENOMEM;
8559 		*data = mapper;
8560 	}
8561 
8562 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8563 }
8564 
8565 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8566 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8567 		     unsigned long ip, void *data)
8568 {
8569 	struct ftrace_func_mapper *mapper = data;
8570 
8571 	if (!ip) {
8572 		if (!mapper)
8573 			return;
8574 		free_ftrace_func_mapper(mapper, NULL);
8575 		return;
8576 	}
8577 
8578 	ftrace_func_mapper_remove_ip(mapper, ip);
8579 }
8580 
8581 static struct ftrace_probe_ops snapshot_probe_ops = {
8582 	.func			= ftrace_snapshot,
8583 	.print			= ftrace_snapshot_print,
8584 };
8585 
8586 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8587 	.func			= ftrace_count_snapshot,
8588 	.print			= ftrace_snapshot_print,
8589 	.init			= ftrace_snapshot_init,
8590 	.free			= ftrace_snapshot_free,
8591 };
8592 
8593 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8594 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8595 			       char *glob, char *cmd, char *param, int enable)
8596 {
8597 	struct ftrace_probe_ops *ops;
8598 	void *count = (void *)-1;
8599 	char *number;
8600 	int ret;
8601 
8602 	if (!tr)
8603 		return -ENODEV;
8604 
8605 	/* hash funcs only work with set_ftrace_filter */
8606 	if (!enable)
8607 		return -EINVAL;
8608 
8609 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8610 
8611 	if (glob[0] == '!')
8612 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8613 
8614 	if (!param)
8615 		goto out_reg;
8616 
8617 	number = strsep(&param, ":");
8618 
8619 	if (!strlen(number))
8620 		goto out_reg;
8621 
8622 	/*
8623 	 * We use the callback data field (which is a pointer)
8624 	 * as our counter.
8625 	 */
8626 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8627 	if (ret)
8628 		return ret;
8629 
8630  out_reg:
8631 	ret = tracing_alloc_snapshot_instance(tr);
8632 	if (ret < 0)
8633 		goto out;
8634 
8635 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8636 
8637  out:
8638 	return ret < 0 ? ret : 0;
8639 }
8640 
8641 static struct ftrace_func_command ftrace_snapshot_cmd = {
8642 	.name			= "snapshot",
8643 	.func			= ftrace_trace_snapshot_callback,
8644 };
8645 
register_snapshot_cmd(void)8646 static __init int register_snapshot_cmd(void)
8647 {
8648 	return register_ftrace_command(&ftrace_snapshot_cmd);
8649 }
8650 #else
register_snapshot_cmd(void)8651 static inline __init int register_snapshot_cmd(void) { return 0; }
8652 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8653 
tracing_get_dentry(struct trace_array * tr)8654 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8655 {
8656 	if (WARN_ON(!tr->dir))
8657 		return ERR_PTR(-ENODEV);
8658 
8659 	/* Top directory uses NULL as the parent */
8660 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8661 		return NULL;
8662 
8663 	/* All sub buffers have a descriptor */
8664 	return tr->dir;
8665 }
8666 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8667 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8668 {
8669 	struct dentry *d_tracer;
8670 
8671 	if (tr->percpu_dir)
8672 		return tr->percpu_dir;
8673 
8674 	d_tracer = tracing_get_dentry(tr);
8675 	if (IS_ERR(d_tracer))
8676 		return NULL;
8677 
8678 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8679 
8680 	MEM_FAIL(!tr->percpu_dir,
8681 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8682 
8683 	return tr->percpu_dir;
8684 }
8685 
8686 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8687 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8688 		      void *data, long cpu, const struct file_operations *fops)
8689 {
8690 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8691 
8692 	if (ret) /* See tracing_get_cpu() */
8693 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8694 	return ret;
8695 }
8696 
8697 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8698 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8699 {
8700 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8701 	struct dentry *d_cpu;
8702 	char cpu_dir[30]; /* 30 characters should be more than enough */
8703 
8704 	if (!d_percpu)
8705 		return;
8706 
8707 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8708 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8709 	if (!d_cpu) {
8710 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8711 		return;
8712 	}
8713 
8714 	/* per cpu trace_pipe */
8715 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8716 				tr, cpu, &tracing_pipe_fops);
8717 
8718 	/* per cpu trace */
8719 	trace_create_cpu_file("trace", 0644, d_cpu,
8720 				tr, cpu, &tracing_fops);
8721 
8722 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8723 				tr, cpu, &tracing_buffers_fops);
8724 
8725 	trace_create_cpu_file("stats", 0444, d_cpu,
8726 				tr, cpu, &tracing_stats_fops);
8727 
8728 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8729 				tr, cpu, &tracing_entries_fops);
8730 
8731 #ifdef CONFIG_TRACER_SNAPSHOT
8732 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8733 				tr, cpu, &snapshot_fops);
8734 
8735 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8736 				tr, cpu, &snapshot_raw_fops);
8737 #endif
8738 }
8739 
8740 #ifdef CONFIG_FTRACE_SELFTEST
8741 /* Let selftest have access to static functions in this file */
8742 #include "trace_selftest.c"
8743 #endif
8744 
8745 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8746 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8747 			loff_t *ppos)
8748 {
8749 	struct trace_option_dentry *topt = filp->private_data;
8750 	char *buf;
8751 
8752 	if (topt->flags->val & topt->opt->bit)
8753 		buf = "1\n";
8754 	else
8755 		buf = "0\n";
8756 
8757 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8758 }
8759 
8760 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8761 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8762 			 loff_t *ppos)
8763 {
8764 	struct trace_option_dentry *topt = filp->private_data;
8765 	unsigned long val;
8766 	int ret;
8767 
8768 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8769 	if (ret)
8770 		return ret;
8771 
8772 	if (val != 0 && val != 1)
8773 		return -EINVAL;
8774 
8775 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8776 		mutex_lock(&trace_types_lock);
8777 		ret = __set_tracer_option(topt->tr, topt->flags,
8778 					  topt->opt, !val);
8779 		mutex_unlock(&trace_types_lock);
8780 		if (ret)
8781 			return ret;
8782 	}
8783 
8784 	*ppos += cnt;
8785 
8786 	return cnt;
8787 }
8788 
tracing_open_options(struct inode * inode,struct file * filp)8789 static int tracing_open_options(struct inode *inode, struct file *filp)
8790 {
8791 	struct trace_option_dentry *topt = inode->i_private;
8792 	int ret;
8793 
8794 	ret = tracing_check_open_get_tr(topt->tr);
8795 	if (ret)
8796 		return ret;
8797 
8798 	filp->private_data = inode->i_private;
8799 	return 0;
8800 }
8801 
tracing_release_options(struct inode * inode,struct file * file)8802 static int tracing_release_options(struct inode *inode, struct file *file)
8803 {
8804 	struct trace_option_dentry *topt = file->private_data;
8805 
8806 	trace_array_put(topt->tr);
8807 	return 0;
8808 }
8809 
8810 static const struct file_operations trace_options_fops = {
8811 	.open = tracing_open_options,
8812 	.read = trace_options_read,
8813 	.write = trace_options_write,
8814 	.llseek	= generic_file_llseek,
8815 	.release = tracing_release_options,
8816 };
8817 
8818 /*
8819  * In order to pass in both the trace_array descriptor as well as the index
8820  * to the flag that the trace option file represents, the trace_array
8821  * has a character array of trace_flags_index[], which holds the index
8822  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8823  * The address of this character array is passed to the flag option file
8824  * read/write callbacks.
8825  *
8826  * In order to extract both the index and the trace_array descriptor,
8827  * get_tr_index() uses the following algorithm.
8828  *
8829  *   idx = *ptr;
8830  *
8831  * As the pointer itself contains the address of the index (remember
8832  * index[1] == 1).
8833  *
8834  * Then to get the trace_array descriptor, by subtracting that index
8835  * from the ptr, we get to the start of the index itself.
8836  *
8837  *   ptr - idx == &index[0]
8838  *
8839  * Then a simple container_of() from that pointer gets us to the
8840  * trace_array descriptor.
8841  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8842 static void get_tr_index(void *data, struct trace_array **ptr,
8843 			 unsigned int *pindex)
8844 {
8845 	*pindex = *(unsigned char *)data;
8846 
8847 	*ptr = container_of(data - *pindex, struct trace_array,
8848 			    trace_flags_index);
8849 }
8850 
8851 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8852 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8853 			loff_t *ppos)
8854 {
8855 	void *tr_index = filp->private_data;
8856 	struct trace_array *tr;
8857 	unsigned int index;
8858 	char *buf;
8859 
8860 	get_tr_index(tr_index, &tr, &index);
8861 
8862 	if (tr->trace_flags & (1 << index))
8863 		buf = "1\n";
8864 	else
8865 		buf = "0\n";
8866 
8867 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8868 }
8869 
8870 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8871 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8872 			 loff_t *ppos)
8873 {
8874 	void *tr_index = filp->private_data;
8875 	struct trace_array *tr;
8876 	unsigned int index;
8877 	unsigned long val;
8878 	int ret;
8879 
8880 	get_tr_index(tr_index, &tr, &index);
8881 
8882 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8883 	if (ret)
8884 		return ret;
8885 
8886 	if (val != 0 && val != 1)
8887 		return -EINVAL;
8888 
8889 	mutex_lock(&event_mutex);
8890 	mutex_lock(&trace_types_lock);
8891 	ret = set_tracer_flag(tr, 1 << index, val);
8892 	mutex_unlock(&trace_types_lock);
8893 	mutex_unlock(&event_mutex);
8894 
8895 	if (ret < 0)
8896 		return ret;
8897 
8898 	*ppos += cnt;
8899 
8900 	return cnt;
8901 }
8902 
8903 static const struct file_operations trace_options_core_fops = {
8904 	.open = tracing_open_generic,
8905 	.read = trace_options_core_read,
8906 	.write = trace_options_core_write,
8907 	.llseek = generic_file_llseek,
8908 };
8909 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8910 struct dentry *trace_create_file(const char *name,
8911 				 umode_t mode,
8912 				 struct dentry *parent,
8913 				 void *data,
8914 				 const struct file_operations *fops)
8915 {
8916 	struct dentry *ret;
8917 
8918 	ret = tracefs_create_file(name, mode, parent, data, fops);
8919 	if (!ret)
8920 		pr_warn("Could not create tracefs '%s' entry\n", name);
8921 
8922 	return ret;
8923 }
8924 
8925 
trace_options_init_dentry(struct trace_array * tr)8926 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8927 {
8928 	struct dentry *d_tracer;
8929 
8930 	if (tr->options)
8931 		return tr->options;
8932 
8933 	d_tracer = tracing_get_dentry(tr);
8934 	if (IS_ERR(d_tracer))
8935 		return NULL;
8936 
8937 	tr->options = tracefs_create_dir("options", d_tracer);
8938 	if (!tr->options) {
8939 		pr_warn("Could not create tracefs directory 'options'\n");
8940 		return NULL;
8941 	}
8942 
8943 	return tr->options;
8944 }
8945 
8946 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8947 create_trace_option_file(struct trace_array *tr,
8948 			 struct trace_option_dentry *topt,
8949 			 struct tracer_flags *flags,
8950 			 struct tracer_opt *opt)
8951 {
8952 	struct dentry *t_options;
8953 
8954 	t_options = trace_options_init_dentry(tr);
8955 	if (!t_options)
8956 		return;
8957 
8958 	topt->flags = flags;
8959 	topt->opt = opt;
8960 	topt->tr = tr;
8961 
8962 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8963 				    &trace_options_fops);
8964 
8965 }
8966 
8967 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8968 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8969 {
8970 	struct trace_option_dentry *topts;
8971 	struct trace_options *tr_topts;
8972 	struct tracer_flags *flags;
8973 	struct tracer_opt *opts;
8974 	int cnt;
8975 	int i;
8976 
8977 	if (!tracer)
8978 		return;
8979 
8980 	flags = tracer->flags;
8981 
8982 	if (!flags || !flags->opts)
8983 		return;
8984 
8985 	/*
8986 	 * If this is an instance, only create flags for tracers
8987 	 * the instance may have.
8988 	 */
8989 	if (!trace_ok_for_array(tracer, tr))
8990 		return;
8991 
8992 	for (i = 0; i < tr->nr_topts; i++) {
8993 		/* Make sure there's no duplicate flags. */
8994 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8995 			return;
8996 	}
8997 
8998 	opts = flags->opts;
8999 
9000 	for (cnt = 0; opts[cnt].name; cnt++)
9001 		;
9002 
9003 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9004 	if (!topts)
9005 		return;
9006 
9007 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9008 			    GFP_KERNEL);
9009 	if (!tr_topts) {
9010 		kfree(topts);
9011 		return;
9012 	}
9013 
9014 	tr->topts = tr_topts;
9015 	tr->topts[tr->nr_topts].tracer = tracer;
9016 	tr->topts[tr->nr_topts].topts = topts;
9017 	tr->nr_topts++;
9018 
9019 	for (cnt = 0; opts[cnt].name; cnt++) {
9020 		create_trace_option_file(tr, &topts[cnt], flags,
9021 					 &opts[cnt]);
9022 		MEM_FAIL(topts[cnt].entry == NULL,
9023 			  "Failed to create trace option: %s",
9024 			  opts[cnt].name);
9025 	}
9026 }
9027 
9028 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9029 create_trace_option_core_file(struct trace_array *tr,
9030 			      const char *option, long index)
9031 {
9032 	struct dentry *t_options;
9033 
9034 	t_options = trace_options_init_dentry(tr);
9035 	if (!t_options)
9036 		return NULL;
9037 
9038 	return trace_create_file(option, 0644, t_options,
9039 				 (void *)&tr->trace_flags_index[index],
9040 				 &trace_options_core_fops);
9041 }
9042 
create_trace_options_dir(struct trace_array * tr)9043 static void create_trace_options_dir(struct trace_array *tr)
9044 {
9045 	struct dentry *t_options;
9046 	bool top_level = tr == &global_trace;
9047 	int i;
9048 
9049 	t_options = trace_options_init_dentry(tr);
9050 	if (!t_options)
9051 		return;
9052 
9053 	for (i = 0; trace_options[i]; i++) {
9054 		if (top_level ||
9055 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9056 			create_trace_option_core_file(tr, trace_options[i], i);
9057 	}
9058 }
9059 
9060 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9061 rb_simple_read(struct file *filp, char __user *ubuf,
9062 	       size_t cnt, loff_t *ppos)
9063 {
9064 	struct trace_array *tr = filp->private_data;
9065 	char buf[64];
9066 	int r;
9067 
9068 	r = tracer_tracing_is_on(tr);
9069 	r = sprintf(buf, "%d\n", r);
9070 
9071 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9072 }
9073 
9074 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9075 rb_simple_write(struct file *filp, const char __user *ubuf,
9076 		size_t cnt, loff_t *ppos)
9077 {
9078 	struct trace_array *tr = filp->private_data;
9079 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9080 	unsigned long val;
9081 	int ret;
9082 
9083 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9084 	if (ret)
9085 		return ret;
9086 
9087 	if (buffer) {
9088 		mutex_lock(&trace_types_lock);
9089 		if (!!val == tracer_tracing_is_on(tr)) {
9090 			val = 0; /* do nothing */
9091 		} else if (val) {
9092 			tracer_tracing_on(tr);
9093 			if (tr->current_trace->start)
9094 				tr->current_trace->start(tr);
9095 		} else {
9096 			tracer_tracing_off(tr);
9097 			if (tr->current_trace->stop)
9098 				tr->current_trace->stop(tr);
9099 			/* Wake up any waiters */
9100 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9101 		}
9102 		mutex_unlock(&trace_types_lock);
9103 	}
9104 
9105 	(*ppos)++;
9106 
9107 	return cnt;
9108 }
9109 
9110 static const struct file_operations rb_simple_fops = {
9111 	.open		= tracing_open_generic_tr,
9112 	.read		= rb_simple_read,
9113 	.write		= rb_simple_write,
9114 	.release	= tracing_release_generic_tr,
9115 	.llseek		= default_llseek,
9116 };
9117 
9118 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9119 buffer_percent_read(struct file *filp, char __user *ubuf,
9120 		    size_t cnt, loff_t *ppos)
9121 {
9122 	struct trace_array *tr = filp->private_data;
9123 	char buf[64];
9124 	int r;
9125 
9126 	r = tr->buffer_percent;
9127 	r = sprintf(buf, "%d\n", r);
9128 
9129 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9130 }
9131 
9132 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9133 buffer_percent_write(struct file *filp, const char __user *ubuf,
9134 		     size_t cnt, loff_t *ppos)
9135 {
9136 	struct trace_array *tr = filp->private_data;
9137 	unsigned long val;
9138 	int ret;
9139 
9140 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9141 	if (ret)
9142 		return ret;
9143 
9144 	if (val > 100)
9145 		return -EINVAL;
9146 
9147 	tr->buffer_percent = val;
9148 
9149 	(*ppos)++;
9150 
9151 	return cnt;
9152 }
9153 
9154 static const struct file_operations buffer_percent_fops = {
9155 	.open		= tracing_open_generic_tr,
9156 	.read		= buffer_percent_read,
9157 	.write		= buffer_percent_write,
9158 	.release	= tracing_release_generic_tr,
9159 	.llseek		= default_llseek,
9160 };
9161 
9162 static struct dentry *trace_instance_dir;
9163 
9164 static void
9165 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9166 
9167 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9168 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9169 {
9170 	enum ring_buffer_flags rb_flags;
9171 
9172 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9173 
9174 	buf->tr = tr;
9175 
9176 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9177 	if (!buf->buffer)
9178 		return -ENOMEM;
9179 
9180 	buf->data = alloc_percpu(struct trace_array_cpu);
9181 	if (!buf->data) {
9182 		ring_buffer_free(buf->buffer);
9183 		buf->buffer = NULL;
9184 		return -ENOMEM;
9185 	}
9186 
9187 	/* Allocate the first page for all buffers */
9188 	set_buffer_entries(&tr->array_buffer,
9189 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9190 
9191 	return 0;
9192 }
9193 
allocate_trace_buffers(struct trace_array * tr,int size)9194 static int allocate_trace_buffers(struct trace_array *tr, int size)
9195 {
9196 	int ret;
9197 
9198 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9199 	if (ret)
9200 		return ret;
9201 
9202 #ifdef CONFIG_TRACER_MAX_TRACE
9203 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9204 				    allocate_snapshot ? size : 1);
9205 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9206 		ring_buffer_free(tr->array_buffer.buffer);
9207 		tr->array_buffer.buffer = NULL;
9208 		free_percpu(tr->array_buffer.data);
9209 		tr->array_buffer.data = NULL;
9210 		return -ENOMEM;
9211 	}
9212 	tr->allocated_snapshot = allocate_snapshot;
9213 
9214 	/*
9215 	 * Only the top level trace array gets its snapshot allocated
9216 	 * from the kernel command line.
9217 	 */
9218 	allocate_snapshot = false;
9219 #endif
9220 
9221 	return 0;
9222 }
9223 
free_trace_buffer(struct array_buffer * buf)9224 static void free_trace_buffer(struct array_buffer *buf)
9225 {
9226 	if (buf->buffer) {
9227 		ring_buffer_free(buf->buffer);
9228 		buf->buffer = NULL;
9229 		free_percpu(buf->data);
9230 		buf->data = NULL;
9231 	}
9232 }
9233 
free_trace_buffers(struct trace_array * tr)9234 static void free_trace_buffers(struct trace_array *tr)
9235 {
9236 	if (!tr)
9237 		return;
9238 
9239 	free_trace_buffer(&tr->array_buffer);
9240 
9241 #ifdef CONFIG_TRACER_MAX_TRACE
9242 	free_trace_buffer(&tr->max_buffer);
9243 #endif
9244 }
9245 
init_trace_flags_index(struct trace_array * tr)9246 static void init_trace_flags_index(struct trace_array *tr)
9247 {
9248 	int i;
9249 
9250 	/* Used by the trace options files */
9251 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9252 		tr->trace_flags_index[i] = i;
9253 }
9254 
__update_tracer_options(struct trace_array * tr)9255 static void __update_tracer_options(struct trace_array *tr)
9256 {
9257 	struct tracer *t;
9258 
9259 	for (t = trace_types; t; t = t->next)
9260 		add_tracer_options(tr, t);
9261 }
9262 
update_tracer_options(struct trace_array * tr)9263 static void update_tracer_options(struct trace_array *tr)
9264 {
9265 	mutex_lock(&trace_types_lock);
9266 	tracer_options_updated = true;
9267 	__update_tracer_options(tr);
9268 	mutex_unlock(&trace_types_lock);
9269 }
9270 
9271 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9272 struct trace_array *trace_array_find(const char *instance)
9273 {
9274 	struct trace_array *tr, *found = NULL;
9275 
9276 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9277 		if (tr->name && strcmp(tr->name, instance) == 0) {
9278 			found = tr;
9279 			break;
9280 		}
9281 	}
9282 
9283 	return found;
9284 }
9285 
trace_array_find_get(const char * instance)9286 struct trace_array *trace_array_find_get(const char *instance)
9287 {
9288 	struct trace_array *tr;
9289 
9290 	mutex_lock(&trace_types_lock);
9291 	tr = trace_array_find(instance);
9292 	if (tr)
9293 		tr->ref++;
9294 	mutex_unlock(&trace_types_lock);
9295 
9296 	return tr;
9297 }
9298 
trace_array_create_dir(struct trace_array * tr)9299 static int trace_array_create_dir(struct trace_array *tr)
9300 {
9301 	int ret;
9302 
9303 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9304 	if (!tr->dir)
9305 		return -EINVAL;
9306 
9307 	ret = event_trace_add_tracer(tr->dir, tr);
9308 	if (ret) {
9309 		tracefs_remove(tr->dir);
9310 		return ret;
9311 	}
9312 
9313 	init_tracer_tracefs(tr, tr->dir);
9314 	__update_tracer_options(tr);
9315 
9316 	return ret;
9317 }
9318 
trace_array_create(const char * name)9319 static struct trace_array *trace_array_create(const char *name)
9320 {
9321 	struct trace_array *tr;
9322 	int ret;
9323 
9324 	ret = -ENOMEM;
9325 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9326 	if (!tr)
9327 		return ERR_PTR(ret);
9328 
9329 	tr->name = kstrdup(name, GFP_KERNEL);
9330 	if (!tr->name)
9331 		goto out_free_tr;
9332 
9333 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9334 		goto out_free_tr;
9335 
9336 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9337 
9338 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9339 
9340 	raw_spin_lock_init(&tr->start_lock);
9341 
9342 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9343 
9344 	tr->current_trace = &nop_trace;
9345 
9346 	INIT_LIST_HEAD(&tr->systems);
9347 	INIT_LIST_HEAD(&tr->events);
9348 	INIT_LIST_HEAD(&tr->hist_vars);
9349 	INIT_LIST_HEAD(&tr->err_log);
9350 
9351 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9352 		goto out_free_tr;
9353 
9354 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9355 		goto out_free_tr;
9356 
9357 	ftrace_init_trace_array(tr);
9358 
9359 	init_trace_flags_index(tr);
9360 
9361 	if (trace_instance_dir) {
9362 		ret = trace_array_create_dir(tr);
9363 		if (ret)
9364 			goto out_free_tr;
9365 	} else
9366 		__trace_early_add_events(tr);
9367 
9368 	list_add(&tr->list, &ftrace_trace_arrays);
9369 
9370 	tr->ref++;
9371 
9372 	return tr;
9373 
9374  out_free_tr:
9375 	ftrace_free_ftrace_ops(tr);
9376 	free_trace_buffers(tr);
9377 	free_cpumask_var(tr->tracing_cpumask);
9378 	kfree(tr->name);
9379 	kfree(tr);
9380 
9381 	return ERR_PTR(ret);
9382 }
9383 
instance_mkdir(const char * name)9384 static int instance_mkdir(const char *name)
9385 {
9386 	struct trace_array *tr;
9387 	int ret;
9388 
9389 	mutex_lock(&event_mutex);
9390 	mutex_lock(&trace_types_lock);
9391 
9392 	ret = -EEXIST;
9393 	if (trace_array_find(name))
9394 		goto out_unlock;
9395 
9396 	tr = trace_array_create(name);
9397 
9398 	ret = PTR_ERR_OR_ZERO(tr);
9399 
9400 out_unlock:
9401 	mutex_unlock(&trace_types_lock);
9402 	mutex_unlock(&event_mutex);
9403 	return ret;
9404 }
9405 
9406 /**
9407  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9408  * @name: The name of the trace array to be looked up/created.
9409  *
9410  * Returns pointer to trace array with given name.
9411  * NULL, if it cannot be created.
9412  *
9413  * NOTE: This function increments the reference counter associated with the
9414  * trace array returned. This makes sure it cannot be freed while in use.
9415  * Use trace_array_put() once the trace array is no longer needed.
9416  * If the trace_array is to be freed, trace_array_destroy() needs to
9417  * be called after the trace_array_put(), or simply let user space delete
9418  * it from the tracefs instances directory. But until the
9419  * trace_array_put() is called, user space can not delete it.
9420  *
9421  */
trace_array_get_by_name(const char * name)9422 struct trace_array *trace_array_get_by_name(const char *name)
9423 {
9424 	struct trace_array *tr;
9425 
9426 	mutex_lock(&event_mutex);
9427 	mutex_lock(&trace_types_lock);
9428 
9429 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9430 		if (tr->name && strcmp(tr->name, name) == 0)
9431 			goto out_unlock;
9432 	}
9433 
9434 	tr = trace_array_create(name);
9435 
9436 	if (IS_ERR(tr))
9437 		tr = NULL;
9438 out_unlock:
9439 	if (tr)
9440 		tr->ref++;
9441 
9442 	mutex_unlock(&trace_types_lock);
9443 	mutex_unlock(&event_mutex);
9444 	return tr;
9445 }
9446 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9447 
__remove_instance(struct trace_array * tr)9448 static int __remove_instance(struct trace_array *tr)
9449 {
9450 	int i;
9451 
9452 	/* Reference counter for a newly created trace array = 1. */
9453 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9454 		return -EBUSY;
9455 
9456 	list_del(&tr->list);
9457 
9458 	/* Disable all the flags that were enabled coming in */
9459 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9460 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9461 			set_tracer_flag(tr, 1 << i, 0);
9462 	}
9463 
9464 	tracing_set_nop(tr);
9465 	clear_ftrace_function_probes(tr);
9466 	event_trace_del_tracer(tr);
9467 	ftrace_clear_pids(tr);
9468 	ftrace_destroy_function_files(tr);
9469 	tracefs_remove(tr->dir);
9470 	free_percpu(tr->last_func_repeats);
9471 	free_trace_buffers(tr);
9472 	clear_tracing_err_log(tr);
9473 
9474 	for (i = 0; i < tr->nr_topts; i++) {
9475 		kfree(tr->topts[i].topts);
9476 	}
9477 	kfree(tr->topts);
9478 
9479 	free_cpumask_var(tr->tracing_cpumask);
9480 	kfree(tr->name);
9481 	kfree(tr);
9482 
9483 	return 0;
9484 }
9485 
trace_array_destroy(struct trace_array * this_tr)9486 int trace_array_destroy(struct trace_array *this_tr)
9487 {
9488 	struct trace_array *tr;
9489 	int ret;
9490 
9491 	if (!this_tr)
9492 		return -EINVAL;
9493 
9494 	mutex_lock(&event_mutex);
9495 	mutex_lock(&trace_types_lock);
9496 
9497 	ret = -ENODEV;
9498 
9499 	/* Making sure trace array exists before destroying it. */
9500 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9501 		if (tr == this_tr) {
9502 			ret = __remove_instance(tr);
9503 			break;
9504 		}
9505 	}
9506 
9507 	mutex_unlock(&trace_types_lock);
9508 	mutex_unlock(&event_mutex);
9509 
9510 	return ret;
9511 }
9512 EXPORT_SYMBOL_GPL(trace_array_destroy);
9513 
instance_rmdir(const char * name)9514 static int instance_rmdir(const char *name)
9515 {
9516 	struct trace_array *tr;
9517 	int ret;
9518 
9519 	mutex_lock(&event_mutex);
9520 	mutex_lock(&trace_types_lock);
9521 
9522 	ret = -ENODEV;
9523 	tr = trace_array_find(name);
9524 	if (tr)
9525 		ret = __remove_instance(tr);
9526 
9527 	mutex_unlock(&trace_types_lock);
9528 	mutex_unlock(&event_mutex);
9529 
9530 	return ret;
9531 }
9532 
create_trace_instances(struct dentry * d_tracer)9533 static __init void create_trace_instances(struct dentry *d_tracer)
9534 {
9535 	struct trace_array *tr;
9536 
9537 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9538 							 instance_mkdir,
9539 							 instance_rmdir);
9540 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9541 		return;
9542 
9543 	mutex_lock(&event_mutex);
9544 	mutex_lock(&trace_types_lock);
9545 
9546 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9547 		if (!tr->name)
9548 			continue;
9549 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9550 			     "Failed to create instance directory\n"))
9551 			break;
9552 	}
9553 
9554 	mutex_unlock(&trace_types_lock);
9555 	mutex_unlock(&event_mutex);
9556 }
9557 
9558 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9559 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9560 {
9561 	struct trace_event_file *file;
9562 	int cpu;
9563 
9564 	trace_create_file("available_tracers", 0444, d_tracer,
9565 			tr, &show_traces_fops);
9566 
9567 	trace_create_file("current_tracer", 0644, d_tracer,
9568 			tr, &set_tracer_fops);
9569 
9570 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9571 			  tr, &tracing_cpumask_fops);
9572 
9573 	trace_create_file("trace_options", 0644, d_tracer,
9574 			  tr, &tracing_iter_fops);
9575 
9576 	trace_create_file("trace", 0644, d_tracer,
9577 			  tr, &tracing_fops);
9578 
9579 	trace_create_file("trace_pipe", 0444, d_tracer,
9580 			  tr, &tracing_pipe_fops);
9581 
9582 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9583 			  tr, &tracing_entries_fops);
9584 
9585 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9586 			  tr, &tracing_total_entries_fops);
9587 
9588 	trace_create_file("free_buffer", 0200, d_tracer,
9589 			  tr, &tracing_free_buffer_fops);
9590 
9591 	trace_create_file("trace_marker", 0220, d_tracer,
9592 			  tr, &tracing_mark_fops);
9593 
9594 	file = __find_event_file(tr, "ftrace", "print");
9595 	if (file && file->dir)
9596 		trace_create_file("trigger", 0644, file->dir, file,
9597 				  &event_trigger_fops);
9598 	tr->trace_marker_file = file;
9599 
9600 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9601 			  tr, &tracing_mark_raw_fops);
9602 
9603 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9604 			  &trace_clock_fops);
9605 
9606 	trace_create_file("tracing_on", 0644, d_tracer,
9607 			  tr, &rb_simple_fops);
9608 
9609 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9610 			  &trace_time_stamp_mode_fops);
9611 
9612 	tr->buffer_percent = 50;
9613 
9614 	trace_create_file("buffer_percent", 0640, d_tracer,
9615 			tr, &buffer_percent_fops);
9616 
9617 	create_trace_options_dir(tr);
9618 
9619 #ifdef CONFIG_TRACER_MAX_TRACE
9620 	trace_create_maxlat_file(tr, d_tracer);
9621 #endif
9622 
9623 	if (ftrace_create_function_files(tr, d_tracer))
9624 		MEM_FAIL(1, "Could not allocate function filter files");
9625 
9626 #ifdef CONFIG_TRACER_SNAPSHOT
9627 	trace_create_file("snapshot", 0644, d_tracer,
9628 			  tr, &snapshot_fops);
9629 #endif
9630 
9631 	trace_create_file("error_log", 0644, d_tracer,
9632 			  tr, &tracing_err_log_fops);
9633 
9634 	for_each_tracing_cpu(cpu)
9635 		tracing_init_tracefs_percpu(tr, cpu);
9636 
9637 	ftrace_init_tracefs(tr, d_tracer);
9638 }
9639 
trace_automount(struct dentry * mntpt,void * ingore)9640 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9641 {
9642 	struct vfsmount *mnt;
9643 	struct file_system_type *type;
9644 
9645 	/*
9646 	 * To maintain backward compatibility for tools that mount
9647 	 * debugfs to get to the tracing facility, tracefs is automatically
9648 	 * mounted to the debugfs/tracing directory.
9649 	 */
9650 	type = get_fs_type("tracefs");
9651 	if (!type)
9652 		return NULL;
9653 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9654 	put_filesystem(type);
9655 	if (IS_ERR(mnt))
9656 		return NULL;
9657 	mntget(mnt);
9658 
9659 	return mnt;
9660 }
9661 
9662 /**
9663  * tracing_init_dentry - initialize top level trace array
9664  *
9665  * This is called when creating files or directories in the tracing
9666  * directory. It is called via fs_initcall() by any of the boot up code
9667  * and expects to return the dentry of the top level tracing directory.
9668  */
tracing_init_dentry(void)9669 int tracing_init_dentry(void)
9670 {
9671 	struct trace_array *tr = &global_trace;
9672 
9673 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9674 		pr_warn("Tracing disabled due to lockdown\n");
9675 		return -EPERM;
9676 	}
9677 
9678 	/* The top level trace array uses  NULL as parent */
9679 	if (tr->dir)
9680 		return 0;
9681 
9682 	if (WARN_ON(!tracefs_initialized()))
9683 		return -ENODEV;
9684 
9685 	/*
9686 	 * As there may still be users that expect the tracing
9687 	 * files to exist in debugfs/tracing, we must automount
9688 	 * the tracefs file system there, so older tools still
9689 	 * work with the newer kernel.
9690 	 */
9691 	tr->dir = debugfs_create_automount("tracing", NULL,
9692 					   trace_automount, NULL);
9693 
9694 	return 0;
9695 }
9696 
9697 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9698 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9699 
9700 static struct workqueue_struct *eval_map_wq __initdata;
9701 static struct work_struct eval_map_work __initdata;
9702 
eval_map_work_func(struct work_struct * work)9703 static void __init eval_map_work_func(struct work_struct *work)
9704 {
9705 	int len;
9706 
9707 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9708 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9709 }
9710 
trace_eval_init(void)9711 static int __init trace_eval_init(void)
9712 {
9713 	INIT_WORK(&eval_map_work, eval_map_work_func);
9714 
9715 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9716 	if (!eval_map_wq) {
9717 		pr_err("Unable to allocate eval_map_wq\n");
9718 		/* Do work here */
9719 		eval_map_work_func(&eval_map_work);
9720 		return -ENOMEM;
9721 	}
9722 
9723 	queue_work(eval_map_wq, &eval_map_work);
9724 	return 0;
9725 }
9726 
trace_eval_sync(void)9727 static int __init trace_eval_sync(void)
9728 {
9729 	/* Make sure the eval map updates are finished */
9730 	if (eval_map_wq)
9731 		destroy_workqueue(eval_map_wq);
9732 	return 0;
9733 }
9734 
9735 late_initcall_sync(trace_eval_sync);
9736 
9737 
9738 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9739 static void trace_module_add_evals(struct module *mod)
9740 {
9741 	if (!mod->num_trace_evals)
9742 		return;
9743 
9744 	/*
9745 	 * Modules with bad taint do not have events created, do
9746 	 * not bother with enums either.
9747 	 */
9748 	if (trace_module_has_bad_taint(mod))
9749 		return;
9750 
9751 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9752 }
9753 
9754 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9755 static void trace_module_remove_evals(struct module *mod)
9756 {
9757 	union trace_eval_map_item *map;
9758 	union trace_eval_map_item **last = &trace_eval_maps;
9759 
9760 	if (!mod->num_trace_evals)
9761 		return;
9762 
9763 	mutex_lock(&trace_eval_mutex);
9764 
9765 	map = trace_eval_maps;
9766 
9767 	while (map) {
9768 		if (map->head.mod == mod)
9769 			break;
9770 		map = trace_eval_jmp_to_tail(map);
9771 		last = &map->tail.next;
9772 		map = map->tail.next;
9773 	}
9774 	if (!map)
9775 		goto out;
9776 
9777 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9778 	kfree(map);
9779  out:
9780 	mutex_unlock(&trace_eval_mutex);
9781 }
9782 #else
trace_module_remove_evals(struct module * mod)9783 static inline void trace_module_remove_evals(struct module *mod) { }
9784 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9785 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9786 static int trace_module_notify(struct notifier_block *self,
9787 			       unsigned long val, void *data)
9788 {
9789 	struct module *mod = data;
9790 
9791 	switch (val) {
9792 	case MODULE_STATE_COMING:
9793 		trace_module_add_evals(mod);
9794 		break;
9795 	case MODULE_STATE_GOING:
9796 		trace_module_remove_evals(mod);
9797 		break;
9798 	}
9799 
9800 	return NOTIFY_OK;
9801 }
9802 
9803 static struct notifier_block trace_module_nb = {
9804 	.notifier_call = trace_module_notify,
9805 	.priority = 0,
9806 };
9807 #endif /* CONFIG_MODULES */
9808 
tracer_init_tracefs(void)9809 static __init int tracer_init_tracefs(void)
9810 {
9811 	int ret;
9812 
9813 	trace_access_lock_init();
9814 
9815 	ret = tracing_init_dentry();
9816 	if (ret)
9817 		return 0;
9818 
9819 	event_trace_init();
9820 
9821 	init_tracer_tracefs(&global_trace, NULL);
9822 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9823 
9824 	trace_create_file("tracing_thresh", 0644, NULL,
9825 			&global_trace, &tracing_thresh_fops);
9826 
9827 	trace_create_file("README", 0444, NULL,
9828 			NULL, &tracing_readme_fops);
9829 
9830 	trace_create_file("saved_cmdlines", 0444, NULL,
9831 			NULL, &tracing_saved_cmdlines_fops);
9832 
9833 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9834 			  NULL, &tracing_saved_cmdlines_size_fops);
9835 
9836 	trace_create_file("saved_tgids", 0444, NULL,
9837 			NULL, &tracing_saved_tgids_fops);
9838 
9839 	trace_eval_init();
9840 
9841 	trace_create_eval_file(NULL);
9842 
9843 #ifdef CONFIG_MODULES
9844 	register_module_notifier(&trace_module_nb);
9845 #endif
9846 
9847 #ifdef CONFIG_DYNAMIC_FTRACE
9848 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9849 			NULL, &tracing_dyn_info_fops);
9850 #endif
9851 
9852 	create_trace_instances(NULL);
9853 
9854 	update_tracer_options(&global_trace);
9855 
9856 	return 0;
9857 }
9858 
9859 fs_initcall(tracer_init_tracefs);
9860 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9861 static int trace_panic_handler(struct notifier_block *this,
9862 			       unsigned long event, void *unused)
9863 {
9864 	bool ftrace_check = false;
9865 
9866 	trace_android_vh_ftrace_oops_enter(&ftrace_check);
9867 
9868 	if (ftrace_check)
9869 		return NOTIFY_OK;
9870 
9871 	if (ftrace_dump_on_oops)
9872 		ftrace_dump(ftrace_dump_on_oops);
9873 
9874 	trace_android_vh_ftrace_oops_exit(&ftrace_check);
9875 	return NOTIFY_OK;
9876 }
9877 
9878 static struct notifier_block trace_panic_notifier = {
9879 	.notifier_call  = trace_panic_handler,
9880 	.next           = NULL,
9881 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9882 };
9883 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9884 static int trace_die_handler(struct notifier_block *self,
9885 			     unsigned long val,
9886 			     void *data)
9887 {
9888 	bool ftrace_check = false;
9889 
9890 	trace_android_vh_ftrace_oops_enter(&ftrace_check);
9891 
9892 	if (ftrace_check)
9893 		return NOTIFY_OK;
9894 
9895 	switch (val) {
9896 	case DIE_OOPS:
9897 		if (ftrace_dump_on_oops)
9898 			ftrace_dump(ftrace_dump_on_oops);
9899 		break;
9900 	default:
9901 		break;
9902 	}
9903 
9904 	trace_android_vh_ftrace_oops_exit(&ftrace_check);
9905 	return NOTIFY_OK;
9906 }
9907 
9908 static struct notifier_block trace_die_notifier = {
9909 	.notifier_call = trace_die_handler,
9910 	.priority = 200
9911 };
9912 
9913 /*
9914  * printk is set to max of 1024, we really don't need it that big.
9915  * Nothing should be printing 1000 characters anyway.
9916  */
9917 #define TRACE_MAX_PRINT		1000
9918 
9919 /*
9920  * Define here KERN_TRACE so that we have one place to modify
9921  * it if we decide to change what log level the ftrace dump
9922  * should be at.
9923  */
9924 #define KERN_TRACE		KERN_EMERG
9925 
9926 void
trace_printk_seq(struct trace_seq * s)9927 trace_printk_seq(struct trace_seq *s)
9928 {
9929 	bool dump_printk = true;
9930 
9931 	/* Probably should print a warning here. */
9932 	if (s->seq.len >= TRACE_MAX_PRINT)
9933 		s->seq.len = TRACE_MAX_PRINT;
9934 
9935 	/*
9936 	 * More paranoid code. Although the buffer size is set to
9937 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9938 	 * an extra layer of protection.
9939 	 */
9940 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9941 		s->seq.len = s->seq.size - 1;
9942 
9943 	/* should be zero ended, but we are paranoid. */
9944 	s->buffer[s->seq.len] = 0;
9945 
9946 	trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
9947 	if (dump_printk)
9948 		printk(KERN_TRACE "%s", s->buffer);
9949 
9950 	trace_seq_init(s);
9951 }
9952 
trace_init_global_iter(struct trace_iterator * iter)9953 void trace_init_global_iter(struct trace_iterator *iter)
9954 {
9955 	iter->tr = &global_trace;
9956 	iter->trace = iter->tr->current_trace;
9957 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9958 	iter->array_buffer = &global_trace.array_buffer;
9959 
9960 	if (iter->trace && iter->trace->open)
9961 		iter->trace->open(iter);
9962 
9963 	/* Annotate start of buffers if we had overruns */
9964 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9965 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9966 
9967 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9968 	if (trace_clocks[iter->tr->clock_id].in_ns)
9969 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9970 
9971 	/* Can not use kmalloc for iter.temp and iter.fmt */
9972 	iter->temp = static_temp_buf;
9973 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9974 	iter->fmt = static_fmt_buf;
9975 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9976 }
9977 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9978 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9979 {
9980 	/* use static because iter can be a bit big for the stack */
9981 	static struct trace_iterator iter;
9982 	static atomic_t dump_running;
9983 	struct trace_array *tr = &global_trace;
9984 	unsigned int old_userobj;
9985 	unsigned long flags;
9986 	int cnt = 0, cpu;
9987 	bool ftrace_check = false;
9988 	unsigned long size;
9989 
9990 	/* Only allow one dump user at a time. */
9991 	if (atomic_inc_return(&dump_running) != 1) {
9992 		atomic_dec(&dump_running);
9993 		return;
9994 	}
9995 
9996 	/*
9997 	 * Always turn off tracing when we dump.
9998 	 * We don't need to show trace output of what happens
9999 	 * between multiple crashes.
10000 	 *
10001 	 * If the user does a sysrq-z, then they can re-enable
10002 	 * tracing with echo 1 > tracing_on.
10003 	 */
10004 	tracing_off();
10005 
10006 	local_irq_save(flags);
10007 
10008 	/* Simulate the iterator */
10009 	trace_init_global_iter(&iter);
10010 
10011 	for_each_tracing_cpu(cpu) {
10012 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10013 		size = ring_buffer_size(iter.array_buffer->buffer, cpu);
10014 		trace_android_vh_ftrace_size_check(size, &ftrace_check);
10015 	}
10016 
10017 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10018 
10019 	/* don't look at user memory in panic mode */
10020 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10021 
10022 	if (ftrace_check)
10023 		goto out_enable;
10024 
10025 	switch (oops_dump_mode) {
10026 	case DUMP_ALL:
10027 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10028 		break;
10029 	case DUMP_ORIG:
10030 		iter.cpu_file = raw_smp_processor_id();
10031 		break;
10032 	case DUMP_NONE:
10033 		goto out_enable;
10034 	default:
10035 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10036 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10037 	}
10038 
10039 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10040 
10041 	/* Did function tracer already get disabled? */
10042 	if (ftrace_is_dead()) {
10043 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10044 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10045 	}
10046 
10047 	/*
10048 	 * We need to stop all tracing on all CPUS to read
10049 	 * the next buffer. This is a bit expensive, but is
10050 	 * not done often. We fill all what we can read,
10051 	 * and then release the locks again.
10052 	 */
10053 
10054 	while (!trace_empty(&iter)) {
10055 		ftrace_check = true;
10056 
10057 		if (!cnt)
10058 			printk(KERN_TRACE "---------------------------------\n");
10059 
10060 		cnt++;
10061 
10062 		trace_iterator_reset(&iter);
10063 		trace_android_vh_ftrace_format_check(&ftrace_check);
10064 		if (ftrace_check)
10065 			iter.iter_flags |= TRACE_FILE_LAT_FMT;
10066 
10067 		if (trace_find_next_entry_inc(&iter) != NULL) {
10068 			int ret;
10069 
10070 			ret = print_trace_line(&iter);
10071 			if (ret != TRACE_TYPE_NO_CONSUME)
10072 				trace_consume(&iter);
10073 		}
10074 		touch_nmi_watchdog();
10075 
10076 		trace_printk_seq(&iter.seq);
10077 	}
10078 
10079 	if (!cnt)
10080 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10081 	else
10082 		printk(KERN_TRACE "---------------------------------\n");
10083 
10084  out_enable:
10085 	tr->trace_flags |= old_userobj;
10086 
10087 	for_each_tracing_cpu(cpu) {
10088 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10089 	}
10090 	atomic_dec(&dump_running);
10091 	local_irq_restore(flags);
10092 }
10093 EXPORT_SYMBOL_GPL(ftrace_dump);
10094 
10095 #define WRITE_BUFSIZE  4096
10096 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10097 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10098 				size_t count, loff_t *ppos,
10099 				int (*createfn)(const char *))
10100 {
10101 	char *kbuf, *buf, *tmp;
10102 	int ret = 0;
10103 	size_t done = 0;
10104 	size_t size;
10105 
10106 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10107 	if (!kbuf)
10108 		return -ENOMEM;
10109 
10110 	while (done < count) {
10111 		size = count - done;
10112 
10113 		if (size >= WRITE_BUFSIZE)
10114 			size = WRITE_BUFSIZE - 1;
10115 
10116 		if (copy_from_user(kbuf, buffer + done, size)) {
10117 			ret = -EFAULT;
10118 			goto out;
10119 		}
10120 		kbuf[size] = '\0';
10121 		buf = kbuf;
10122 		do {
10123 			tmp = strchr(buf, '\n');
10124 			if (tmp) {
10125 				*tmp = '\0';
10126 				size = tmp - buf + 1;
10127 			} else {
10128 				size = strlen(buf);
10129 				if (done + size < count) {
10130 					if (buf != kbuf)
10131 						break;
10132 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10133 					pr_warn("Line length is too long: Should be less than %d\n",
10134 						WRITE_BUFSIZE - 2);
10135 					ret = -EINVAL;
10136 					goto out;
10137 				}
10138 			}
10139 			done += size;
10140 
10141 			/* Remove comments */
10142 			tmp = strchr(buf, '#');
10143 
10144 			if (tmp)
10145 				*tmp = '\0';
10146 
10147 			ret = createfn(buf);
10148 			if (ret)
10149 				goto out;
10150 			buf += size;
10151 
10152 		} while (done < count);
10153 	}
10154 	ret = done;
10155 
10156 out:
10157 	kfree(kbuf);
10158 
10159 	return ret;
10160 }
10161 
tracer_alloc_buffers(void)10162 __init static int tracer_alloc_buffers(void)
10163 {
10164 	int ring_buf_size;
10165 	int ret = -ENOMEM;
10166 
10167 
10168 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10169 		pr_warn("Tracing disabled due to lockdown\n");
10170 		return -EPERM;
10171 	}
10172 
10173 	/*
10174 	 * Make sure we don't accidentally add more trace options
10175 	 * than we have bits for.
10176 	 */
10177 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10178 
10179 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10180 		goto out;
10181 
10182 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10183 		goto out_free_buffer_mask;
10184 
10185 	/* Only allocate trace_printk buffers if a trace_printk exists */
10186 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10187 		/* Must be called before global_trace.buffer is allocated */
10188 		trace_printk_init_buffers();
10189 
10190 	/* To save memory, keep the ring buffer size to its minimum */
10191 	if (ring_buffer_expanded)
10192 		ring_buf_size = trace_buf_size;
10193 	else
10194 		ring_buf_size = 1;
10195 
10196 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10197 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10198 
10199 	raw_spin_lock_init(&global_trace.start_lock);
10200 
10201 	/*
10202 	 * The prepare callbacks allocates some memory for the ring buffer. We
10203 	 * don't free the buffer if the CPU goes down. If we were to free
10204 	 * the buffer, then the user would lose any trace that was in the
10205 	 * buffer. The memory will be removed once the "instance" is removed.
10206 	 */
10207 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10208 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10209 				      NULL);
10210 	if (ret < 0)
10211 		goto out_free_cpumask;
10212 	/* Used for event triggers */
10213 	ret = -ENOMEM;
10214 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10215 	if (!temp_buffer)
10216 		goto out_rm_hp_state;
10217 
10218 	if (trace_create_savedcmd() < 0)
10219 		goto out_free_temp_buffer;
10220 
10221 	/* TODO: make the number of buffers hot pluggable with CPUS */
10222 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10223 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10224 		goto out_free_savedcmd;
10225 	}
10226 
10227 	if (global_trace.buffer_disabled)
10228 		tracing_off();
10229 
10230 	if (trace_boot_clock) {
10231 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10232 		if (ret < 0)
10233 			pr_warn("Trace clock %s not defined, going back to default\n",
10234 				trace_boot_clock);
10235 	}
10236 
10237 	/*
10238 	 * register_tracer() might reference current_trace, so it
10239 	 * needs to be set before we register anything. This is
10240 	 * just a bootstrap of current_trace anyway.
10241 	 */
10242 	global_trace.current_trace = &nop_trace;
10243 
10244 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10245 
10246 	ftrace_init_global_array_ops(&global_trace);
10247 
10248 	init_trace_flags_index(&global_trace);
10249 
10250 	register_tracer(&nop_trace);
10251 
10252 	/* Function tracing may start here (via kernel command line) */
10253 	init_function_trace();
10254 
10255 	/* All seems OK, enable tracing */
10256 	tracing_disabled = 0;
10257 
10258 	atomic_notifier_chain_register(&panic_notifier_list,
10259 				       &trace_panic_notifier);
10260 
10261 	register_die_notifier(&trace_die_notifier);
10262 
10263 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10264 
10265 	INIT_LIST_HEAD(&global_trace.systems);
10266 	INIT_LIST_HEAD(&global_trace.events);
10267 	INIT_LIST_HEAD(&global_trace.hist_vars);
10268 	INIT_LIST_HEAD(&global_trace.err_log);
10269 	list_add(&global_trace.list, &ftrace_trace_arrays);
10270 
10271 	apply_trace_boot_options();
10272 
10273 	register_snapshot_cmd();
10274 
10275 	test_can_verify();
10276 
10277 	return 0;
10278 
10279 out_free_savedcmd:
10280 	free_saved_cmdlines_buffer(savedcmd);
10281 out_free_temp_buffer:
10282 	ring_buffer_free(temp_buffer);
10283 out_rm_hp_state:
10284 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10285 out_free_cpumask:
10286 	free_cpumask_var(global_trace.tracing_cpumask);
10287 out_free_buffer_mask:
10288 	free_cpumask_var(tracing_buffer_mask);
10289 out:
10290 	return ret;
10291 }
10292 
early_trace_init(void)10293 void __init early_trace_init(void)
10294 {
10295 	if (tracepoint_printk) {
10296 		tracepoint_print_iter =
10297 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10298 		if (MEM_FAIL(!tracepoint_print_iter,
10299 			     "Failed to allocate trace iterator\n"))
10300 			tracepoint_printk = 0;
10301 		else
10302 			static_key_enable(&tracepoint_printk_key.key);
10303 	}
10304 	tracer_alloc_buffers();
10305 
10306 	init_events();
10307 }
10308 
trace_init(void)10309 void __init trace_init(void)
10310 {
10311 	trace_event_init();
10312 }
10313 
clear_boot_tracer(void)10314 __init static void clear_boot_tracer(void)
10315 {
10316 	/*
10317 	 * The default tracer at boot buffer is an init section.
10318 	 * This function is called in lateinit. If we did not
10319 	 * find the boot tracer, then clear it out, to prevent
10320 	 * later registration from accessing the buffer that is
10321 	 * about to be freed.
10322 	 */
10323 	if (!default_bootup_tracer)
10324 		return;
10325 
10326 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10327 	       default_bootup_tracer);
10328 	default_bootup_tracer = NULL;
10329 }
10330 
10331 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10332 __init static void tracing_set_default_clock(void)
10333 {
10334 	/* sched_clock_stable() is determined in late_initcall */
10335 	if (!trace_boot_clock && !sched_clock_stable()) {
10336 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10337 			pr_warn("Can not set tracing clock due to lockdown\n");
10338 			return;
10339 		}
10340 
10341 		printk(KERN_WARNING
10342 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10343 		       "If you want to keep using the local clock, then add:\n"
10344 		       "  \"trace_clock=local\"\n"
10345 		       "on the kernel command line\n");
10346 		tracing_set_clock(&global_trace, "global");
10347 	}
10348 }
10349 #else
tracing_set_default_clock(void)10350 static inline void tracing_set_default_clock(void) { }
10351 #endif
10352 
late_trace_init(void)10353 __init static int late_trace_init(void)
10354 {
10355 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10356 		static_key_disable(&tracepoint_printk_key.key);
10357 		tracepoint_printk = 0;
10358 	}
10359 
10360 	tracing_set_default_clock();
10361 	clear_boot_tracer();
10362 	return 0;
10363 }
10364 
10365 late_initcall_sync(late_trace_init);
10366