• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/kmemleak.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/fsnotify.h>
51 #include <linux/irq_work.h>
52 #include <linux/workqueue.h>
53 #include <trace/hooks/ftrace_dump.h>
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 /*
59  * On boot up, the ring buffer is set to the minimum size, so that
60  * we do not waste memory on systems that are not using tracing.
61  */
62 bool ring_buffer_expanded;
63 
64 /*
65  * We need to change this state when a selftest is running.
66  * A selftest will lurk into the ring-buffer to count the
67  * entries inserted during the selftest although some concurrent
68  * insertions into the ring-buffer such as trace_printk could occurred
69  * at the same time, giving false positive or negative results.
70  */
71 static bool __read_mostly tracing_selftest_running;
72 
73 /*
74  * If boot-time tracing including tracers/events via kernel cmdline
75  * is running, we do not want to run SELFTEST.
76  */
77 bool __read_mostly tracing_selftest_disabled;
78 
79 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)80 void __init disable_tracing_selftest(const char *reason)
81 {
82 	if (!tracing_selftest_disabled) {
83 		tracing_selftest_disabled = true;
84 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
85 	}
86 }
87 #endif
88 
89 /* Pipe tracepoints to printk */
90 struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94 
95 /* For tracers that don't implement custom flags */
96 static struct tracer_opt dummy_tracer_opt[] = {
97 	{ }
98 };
99 
100 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)101 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
102 {
103 	return 0;
104 }
105 
106 /*
107  * To prevent the comm cache from being overwritten when no
108  * tracing is active, only save the comm when a trace event
109  * occurred.
110  */
111 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
112 
113 /*
114  * Kill all tracing for good (never come back).
115  * It is initialized to 1 but will turn to zero if the initialization
116  * of the tracer is successful. But that is the only place that sets
117  * this back to zero.
118  */
119 static int tracing_disabled = 1;
120 
121 cpumask_var_t __read_mostly	tracing_buffer_mask;
122 
123 /*
124  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
125  *
126  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
127  * is set, then ftrace_dump is called. This will output the contents
128  * of the ftrace buffers to the console.  This is very useful for
129  * capturing traces that lead to crashes and outputing it to a
130  * serial console.
131  *
132  * It is default off, but you can enable it with either specifying
133  * "ftrace_dump_on_oops" in the kernel command line, or setting
134  * /proc/sys/kernel/ftrace_dump_on_oops
135  * Set 1 if you want to dump buffers of all CPUs
136  * Set 2 if you want to dump the buffer of the CPU that triggered oops
137  */
138 
139 enum ftrace_dump_mode ftrace_dump_on_oops;
140 
141 /* When set, tracing will stop when a WARN*() is hit */
142 int __disable_trace_on_warning;
143 
144 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
145 /* Map of enums to their values, for "eval_map" file */
146 struct trace_eval_map_head {
147 	struct module			*mod;
148 	unsigned long			length;
149 };
150 
151 union trace_eval_map_item;
152 
153 struct trace_eval_map_tail {
154 	/*
155 	 * "end" is first and points to NULL as it must be different
156 	 * than "mod" or "eval_string"
157 	 */
158 	union trace_eval_map_item	*next;
159 	const char			*end;	/* points to NULL */
160 };
161 
162 static DEFINE_MUTEX(trace_eval_mutex);
163 
164 /*
165  * The trace_eval_maps are saved in an array with two extra elements,
166  * one at the beginning, and one at the end. The beginning item contains
167  * the count of the saved maps (head.length), and the module they
168  * belong to if not built in (head.mod). The ending item contains a
169  * pointer to the next array of saved eval_map items.
170  */
171 union trace_eval_map_item {
172 	struct trace_eval_map		map;
173 	struct trace_eval_map_head	head;
174 	struct trace_eval_map_tail	tail;
175 };
176 
177 static union trace_eval_map_item *trace_eval_maps;
178 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
179 
180 int tracing_set_tracer(struct trace_array *tr, const char *buf);
181 static void ftrace_trace_userstack(struct trace_array *tr,
182 				   struct trace_buffer *buffer,
183 				   unsigned int trace_ctx);
184 
185 #define MAX_TRACER_SIZE		100
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188 
189 static bool allocate_snapshot;
190 
set_cmdline_ftrace(char * str)191 static int __init set_cmdline_ftrace(char *str)
192 {
193 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
194 	default_bootup_tracer = bootup_tracer_buf;
195 	/* We are using ftrace early, expand it */
196 	ring_buffer_expanded = true;
197 	return 1;
198 }
199 __setup("ftrace=", set_cmdline_ftrace);
200 
set_ftrace_dump_on_oops(char * str)201 static int __init set_ftrace_dump_on_oops(char *str)
202 {
203 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
204 		ftrace_dump_on_oops = DUMP_ALL;
205 		return 1;
206 	}
207 
208 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
209 		ftrace_dump_on_oops = DUMP_ORIG;
210                 return 1;
211         }
212 
213         return 0;
214 }
215 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
216 
stop_trace_on_warning(char * str)217 static int __init stop_trace_on_warning(char *str)
218 {
219 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
220 		__disable_trace_on_warning = 1;
221 	return 1;
222 }
223 __setup("traceoff_on_warning", stop_trace_on_warning);
224 
boot_alloc_snapshot(char * str)225 static int __init boot_alloc_snapshot(char *str)
226 {
227 	allocate_snapshot = true;
228 	/* We also need the main ring buffer expanded */
229 	ring_buffer_expanded = true;
230 	return 1;
231 }
232 __setup("alloc_snapshot", boot_alloc_snapshot);
233 
234 
235 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
236 
set_trace_boot_options(char * str)237 static int __init set_trace_boot_options(char *str)
238 {
239 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
240 	return 1;
241 }
242 __setup("trace_options=", set_trace_boot_options);
243 
244 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
245 static char *trace_boot_clock __initdata;
246 
set_trace_boot_clock(char * str)247 static int __init set_trace_boot_clock(char *str)
248 {
249 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
250 	trace_boot_clock = trace_boot_clock_buf;
251 	return 1;
252 }
253 __setup("trace_clock=", set_trace_boot_clock);
254 
set_tracepoint_printk(char * str)255 static int __init set_tracepoint_printk(char *str)
256 {
257 	/* Ignore the "tp_printk_stop_on_boot" param */
258 	if (*str == '_')
259 		return 0;
260 
261 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
262 		tracepoint_printk = 1;
263 	return 1;
264 }
265 __setup("tp_printk", set_tracepoint_printk);
266 
set_tracepoint_printk_stop(char * str)267 static int __init set_tracepoint_printk_stop(char *str)
268 {
269 	tracepoint_printk_stop_on_boot = true;
270 	return 1;
271 }
272 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
273 
ns2usecs(u64 nsec)274 unsigned long long ns2usecs(u64 nsec)
275 {
276 	nsec += 500;
277 	do_div(nsec, 1000);
278 	return nsec;
279 }
280 
281 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)282 trace_process_export(struct trace_export *export,
283 	       struct ring_buffer_event *event, int flag)
284 {
285 	struct trace_entry *entry;
286 	unsigned int size = 0;
287 
288 	if (export->flags & flag) {
289 		entry = ring_buffer_event_data(event);
290 		size = ring_buffer_event_length(event);
291 		export->write(export, entry, size);
292 	}
293 }
294 
295 static DEFINE_MUTEX(ftrace_export_lock);
296 
297 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
298 
299 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
300 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
301 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
302 
ftrace_exports_enable(struct trace_export * export)303 static inline void ftrace_exports_enable(struct trace_export *export)
304 {
305 	if (export->flags & TRACE_EXPORT_FUNCTION)
306 		static_branch_inc(&trace_function_exports_enabled);
307 
308 	if (export->flags & TRACE_EXPORT_EVENT)
309 		static_branch_inc(&trace_event_exports_enabled);
310 
311 	if (export->flags & TRACE_EXPORT_MARKER)
312 		static_branch_inc(&trace_marker_exports_enabled);
313 }
314 
ftrace_exports_disable(struct trace_export * export)315 static inline void ftrace_exports_disable(struct trace_export *export)
316 {
317 	if (export->flags & TRACE_EXPORT_FUNCTION)
318 		static_branch_dec(&trace_function_exports_enabled);
319 
320 	if (export->flags & TRACE_EXPORT_EVENT)
321 		static_branch_dec(&trace_event_exports_enabled);
322 
323 	if (export->flags & TRACE_EXPORT_MARKER)
324 		static_branch_dec(&trace_marker_exports_enabled);
325 }
326 
ftrace_exports(struct ring_buffer_event * event,int flag)327 static void ftrace_exports(struct ring_buffer_event *event, int flag)
328 {
329 	struct trace_export *export;
330 
331 	preempt_disable_notrace();
332 
333 	export = rcu_dereference_raw_check(ftrace_exports_list);
334 	while (export) {
335 		trace_process_export(export, event, flag);
336 		export = rcu_dereference_raw_check(export->next);
337 	}
338 
339 	preempt_enable_notrace();
340 }
341 
342 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)343 add_trace_export(struct trace_export **list, struct trace_export *export)
344 {
345 	rcu_assign_pointer(export->next, *list);
346 	/*
347 	 * We are entering export into the list but another
348 	 * CPU might be walking that list. We need to make sure
349 	 * the export->next pointer is valid before another CPU sees
350 	 * the export pointer included into the list.
351 	 */
352 	rcu_assign_pointer(*list, export);
353 }
354 
355 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)356 rm_trace_export(struct trace_export **list, struct trace_export *export)
357 {
358 	struct trace_export **p;
359 
360 	for (p = list; *p != NULL; p = &(*p)->next)
361 		if (*p == export)
362 			break;
363 
364 	if (*p != export)
365 		return -1;
366 
367 	rcu_assign_pointer(*p, (*p)->next);
368 
369 	return 0;
370 }
371 
372 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)373 add_ftrace_export(struct trace_export **list, struct trace_export *export)
374 {
375 	ftrace_exports_enable(export);
376 
377 	add_trace_export(list, export);
378 }
379 
380 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)381 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
382 {
383 	int ret;
384 
385 	ret = rm_trace_export(list, export);
386 	ftrace_exports_disable(export);
387 
388 	return ret;
389 }
390 
register_ftrace_export(struct trace_export * export)391 int register_ftrace_export(struct trace_export *export)
392 {
393 	if (WARN_ON_ONCE(!export->write))
394 		return -1;
395 
396 	mutex_lock(&ftrace_export_lock);
397 
398 	add_ftrace_export(&ftrace_exports_list, export);
399 
400 	mutex_unlock(&ftrace_export_lock);
401 
402 	return 0;
403 }
404 EXPORT_SYMBOL_GPL(register_ftrace_export);
405 
unregister_ftrace_export(struct trace_export * export)406 int unregister_ftrace_export(struct trace_export *export)
407 {
408 	int ret;
409 
410 	mutex_lock(&ftrace_export_lock);
411 
412 	ret = rm_ftrace_export(&ftrace_exports_list, export);
413 
414 	mutex_unlock(&ftrace_export_lock);
415 
416 	return ret;
417 }
418 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
419 
420 /* trace_flags holds trace_options default values */
421 #define TRACE_DEFAULT_FLAGS						\
422 	(FUNCTION_DEFAULT_FLAGS |					\
423 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
424 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
425 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
426 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
427 	 TRACE_ITER_HASH_PTR)
428 
429 /* trace_options that are only supported by global_trace */
430 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
431 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
432 
433 /* trace_flags that are default zero for instances */
434 #define ZEROED_TRACE_FLAGS \
435 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
436 
437 /*
438  * The global_trace is the descriptor that holds the top-level tracing
439  * buffers for the live tracing.
440  */
441 static struct trace_array global_trace = {
442 	.trace_flags = TRACE_DEFAULT_FLAGS,
443 };
444 
445 LIST_HEAD(ftrace_trace_arrays);
446 
trace_array_get(struct trace_array * this_tr)447 int trace_array_get(struct trace_array *this_tr)
448 {
449 	struct trace_array *tr;
450 	int ret = -ENODEV;
451 
452 	mutex_lock(&trace_types_lock);
453 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
454 		if (tr == this_tr) {
455 			tr->ref++;
456 			ret = 0;
457 			break;
458 		}
459 	}
460 	mutex_unlock(&trace_types_lock);
461 
462 	return ret;
463 }
464 
__trace_array_put(struct trace_array * this_tr)465 static void __trace_array_put(struct trace_array *this_tr)
466 {
467 	WARN_ON(!this_tr->ref);
468 	this_tr->ref--;
469 }
470 
471 /**
472  * trace_array_put - Decrement the reference counter for this trace array.
473  * @this_tr : pointer to the trace array
474  *
475  * NOTE: Use this when we no longer need the trace array returned by
476  * trace_array_get_by_name(). This ensures the trace array can be later
477  * destroyed.
478  *
479  */
trace_array_put(struct trace_array * this_tr)480 void trace_array_put(struct trace_array *this_tr)
481 {
482 	if (!this_tr)
483 		return;
484 
485 	mutex_lock(&trace_types_lock);
486 	__trace_array_put(this_tr);
487 	mutex_unlock(&trace_types_lock);
488 }
489 EXPORT_SYMBOL_GPL(trace_array_put);
490 
tracing_check_open_get_tr(struct trace_array * tr)491 int tracing_check_open_get_tr(struct trace_array *tr)
492 {
493 	int ret;
494 
495 	ret = security_locked_down(LOCKDOWN_TRACEFS);
496 	if (ret)
497 		return ret;
498 
499 	if (tracing_disabled)
500 		return -ENODEV;
501 
502 	if (tr && trace_array_get(tr) < 0)
503 		return -ENODEV;
504 
505 	return 0;
506 }
507 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)508 int call_filter_check_discard(struct trace_event_call *call, void *rec,
509 			      struct trace_buffer *buffer,
510 			      struct ring_buffer_event *event)
511 {
512 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
513 	    !filter_match_preds(call->filter, rec)) {
514 		__trace_event_discard_commit(buffer, event);
515 		return 1;
516 	}
517 
518 	return 0;
519 }
520 
521 /**
522  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
523  * @filtered_pids: The list of pids to check
524  * @search_pid: The PID to find in @filtered_pids
525  *
526  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
527  */
528 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)529 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
530 {
531 	return trace_pid_list_is_set(filtered_pids, search_pid);
532 }
533 
534 /**
535  * trace_ignore_this_task - should a task be ignored for tracing
536  * @filtered_pids: The list of pids to check
537  * @filtered_no_pids: The list of pids not to be traced
538  * @task: The task that should be ignored if not filtered
539  *
540  * Checks if @task should be traced or not from @filtered_pids.
541  * Returns true if @task should *NOT* be traced.
542  * Returns false if @task should be traced.
543  */
544 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)545 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
546 		       struct trace_pid_list *filtered_no_pids,
547 		       struct task_struct *task)
548 {
549 	/*
550 	 * If filtered_no_pids is not empty, and the task's pid is listed
551 	 * in filtered_no_pids, then return true.
552 	 * Otherwise, if filtered_pids is empty, that means we can
553 	 * trace all tasks. If it has content, then only trace pids
554 	 * within filtered_pids.
555 	 */
556 
557 	return (filtered_pids &&
558 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
559 		(filtered_no_pids &&
560 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
561 }
562 
563 /**
564  * trace_filter_add_remove_task - Add or remove a task from a pid_list
565  * @pid_list: The list to modify
566  * @self: The current task for fork or NULL for exit
567  * @task: The task to add or remove
568  *
569  * If adding a task, if @self is defined, the task is only added if @self
570  * is also included in @pid_list. This happens on fork and tasks should
571  * only be added when the parent is listed. If @self is NULL, then the
572  * @task pid will be removed from the list, which would happen on exit
573  * of a task.
574  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)575 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
576 				  struct task_struct *self,
577 				  struct task_struct *task)
578 {
579 	if (!pid_list)
580 		return;
581 
582 	/* For forks, we only add if the forking task is listed */
583 	if (self) {
584 		if (!trace_find_filtered_pid(pid_list, self->pid))
585 			return;
586 	}
587 
588 	/* "self" is set for forks, and NULL for exits */
589 	if (self)
590 		trace_pid_list_set(pid_list, task->pid);
591 	else
592 		trace_pid_list_clear(pid_list, task->pid);
593 }
594 
595 /**
596  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
597  * @pid_list: The pid list to show
598  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
599  * @pos: The position of the file
600  *
601  * This is used by the seq_file "next" operation to iterate the pids
602  * listed in a trace_pid_list structure.
603  *
604  * Returns the pid+1 as we want to display pid of zero, but NULL would
605  * stop the iteration.
606  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)607 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
608 {
609 	long pid = (unsigned long)v;
610 	unsigned int next;
611 
612 	(*pos)++;
613 
614 	/* pid already is +1 of the actual previous bit */
615 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
616 		return NULL;
617 
618 	pid = next;
619 
620 	/* Return pid + 1 to allow zero to be represented */
621 	return (void *)(pid + 1);
622 }
623 
624 /**
625  * trace_pid_start - Used for seq_file to start reading pid lists
626  * @pid_list: The pid list to show
627  * @pos: The position of the file
628  *
629  * This is used by seq_file "start" operation to start the iteration
630  * of listing pids.
631  *
632  * Returns the pid+1 as we want to display pid of zero, but NULL would
633  * stop the iteration.
634  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)635 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
636 {
637 	unsigned long pid;
638 	unsigned int first;
639 	loff_t l = 0;
640 
641 	if (trace_pid_list_first(pid_list, &first) < 0)
642 		return NULL;
643 
644 	pid = first;
645 
646 	/* Return pid + 1 so that zero can be the exit value */
647 	for (pid++; pid && l < *pos;
648 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 		;
650 	return (void *)pid;
651 }
652 
653 /**
654  * trace_pid_show - show the current pid in seq_file processing
655  * @m: The seq_file structure to write into
656  * @v: A void pointer of the pid (+1) value to display
657  *
658  * Can be directly used by seq_file operations to display the current
659  * pid value.
660  */
trace_pid_show(struct seq_file * m,void * v)661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 	unsigned long pid = (unsigned long)v - 1;
664 
665 	seq_printf(m, "%lu\n", pid);
666 	return 0;
667 }
668 
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE		127
671 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 		    struct trace_pid_list **new_pid_list,
674 		    const char __user *ubuf, size_t cnt)
675 {
676 	struct trace_pid_list *pid_list;
677 	struct trace_parser parser;
678 	unsigned long val;
679 	int nr_pids = 0;
680 	ssize_t read = 0;
681 	ssize_t ret;
682 	loff_t pos;
683 	pid_t pid;
684 
685 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 		return -ENOMEM;
687 
688 	/*
689 	 * Always recreate a new array. The write is an all or nothing
690 	 * operation. Always create a new array when adding new pids by
691 	 * the user. If the operation fails, then the current list is
692 	 * not modified.
693 	 */
694 	pid_list = trace_pid_list_alloc();
695 	if (!pid_list) {
696 		trace_parser_put(&parser);
697 		return -ENOMEM;
698 	}
699 
700 	if (filtered_pids) {
701 		/* copy the current bits to the new max */
702 		ret = trace_pid_list_first(filtered_pids, &pid);
703 		while (!ret) {
704 			trace_pid_list_set(pid_list, pid);
705 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
706 			nr_pids++;
707 		}
708 	}
709 
710 	ret = 0;
711 	while (cnt > 0) {
712 
713 		pos = 0;
714 
715 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
716 		if (ret < 0)
717 			break;
718 
719 		read += ret;
720 		ubuf += ret;
721 		cnt -= ret;
722 
723 		if (!trace_parser_loaded(&parser))
724 			break;
725 
726 		ret = -EINVAL;
727 		if (kstrtoul(parser.buffer, 0, &val))
728 			break;
729 
730 		pid = (pid_t)val;
731 
732 		if (trace_pid_list_set(pid_list, pid) < 0) {
733 			ret = -1;
734 			break;
735 		}
736 		nr_pids++;
737 
738 		trace_parser_clear(&parser);
739 		ret = 0;
740 	}
741 	trace_parser_put(&parser);
742 
743 	if (ret < 0) {
744 		trace_pid_list_free(pid_list);
745 		return ret;
746 	}
747 
748 	if (!nr_pids) {
749 		/* Cleared the list of pids */
750 		trace_pid_list_free(pid_list);
751 		pid_list = NULL;
752 	}
753 
754 	*new_pid_list = pid_list;
755 
756 	return read;
757 }
758 
buffer_ftrace_now(struct array_buffer * buf,int cpu)759 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
760 {
761 	u64 ts;
762 
763 	/* Early boot up does not have a buffer yet */
764 	if (!buf->buffer)
765 		return trace_clock_local();
766 
767 	ts = ring_buffer_time_stamp(buf->buffer);
768 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
769 
770 	return ts;
771 }
772 
ftrace_now(int cpu)773 u64 ftrace_now(int cpu)
774 {
775 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
776 }
777 
778 /**
779  * tracing_is_enabled - Show if global_trace has been enabled
780  *
781  * Shows if the global trace has been enabled or not. It uses the
782  * mirror flag "buffer_disabled" to be used in fast paths such as for
783  * the irqsoff tracer. But it may be inaccurate due to races. If you
784  * need to know the accurate state, use tracing_is_on() which is a little
785  * slower, but accurate.
786  */
tracing_is_enabled(void)787 int tracing_is_enabled(void)
788 {
789 	/*
790 	 * For quick access (irqsoff uses this in fast path), just
791 	 * return the mirror variable of the state of the ring buffer.
792 	 * It's a little racy, but we don't really care.
793 	 */
794 	smp_rmb();
795 	return !global_trace.buffer_disabled;
796 }
797 
798 /*
799  * trace_buf_size is the size in bytes that is allocated
800  * for a buffer. Note, the number of bytes is always rounded
801  * to page size.
802  *
803  * This number is purposely set to a low number of 16384.
804  * If the dump on oops happens, it will be much appreciated
805  * to not have to wait for all that output. Anyway this can be
806  * boot time and run time configurable.
807  */
808 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
809 
810 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
811 
812 /* trace_types holds a link list of available tracers. */
813 static struct tracer		*trace_types __read_mostly;
814 
815 /*
816  * trace_types_lock is used to protect the trace_types list.
817  */
818 DEFINE_MUTEX(trace_types_lock);
819 
820 /*
821  * serialize the access of the ring buffer
822  *
823  * ring buffer serializes readers, but it is low level protection.
824  * The validity of the events (which returns by ring_buffer_peek() ..etc)
825  * are not protected by ring buffer.
826  *
827  * The content of events may become garbage if we allow other process consumes
828  * these events concurrently:
829  *   A) the page of the consumed events may become a normal page
830  *      (not reader page) in ring buffer, and this page will be rewritten
831  *      by events producer.
832  *   B) The page of the consumed events may become a page for splice_read,
833  *      and this page will be returned to system.
834  *
835  * These primitives allow multi process access to different cpu ring buffer
836  * concurrently.
837  *
838  * These primitives don't distinguish read-only and read-consume access.
839  * Multi read-only access are also serialized.
840  */
841 
842 #ifdef CONFIG_SMP
843 static DECLARE_RWSEM(all_cpu_access_lock);
844 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
845 
trace_access_lock(int cpu)846 static inline void trace_access_lock(int cpu)
847 {
848 	if (cpu == RING_BUFFER_ALL_CPUS) {
849 		/* gain it for accessing the whole ring buffer. */
850 		down_write(&all_cpu_access_lock);
851 	} else {
852 		/* gain it for accessing a cpu ring buffer. */
853 
854 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
855 		down_read(&all_cpu_access_lock);
856 
857 		/* Secondly block other access to this @cpu ring buffer. */
858 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
859 	}
860 }
861 
trace_access_unlock(int cpu)862 static inline void trace_access_unlock(int cpu)
863 {
864 	if (cpu == RING_BUFFER_ALL_CPUS) {
865 		up_write(&all_cpu_access_lock);
866 	} else {
867 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
868 		up_read(&all_cpu_access_lock);
869 	}
870 }
871 
trace_access_lock_init(void)872 static inline void trace_access_lock_init(void)
873 {
874 	int cpu;
875 
876 	for_each_possible_cpu(cpu)
877 		mutex_init(&per_cpu(cpu_access_lock, cpu));
878 }
879 
880 #else
881 
882 static DEFINE_MUTEX(access_lock);
883 
trace_access_lock(int cpu)884 static inline void trace_access_lock(int cpu)
885 {
886 	(void)cpu;
887 	mutex_lock(&access_lock);
888 }
889 
trace_access_unlock(int cpu)890 static inline void trace_access_unlock(int cpu)
891 {
892 	(void)cpu;
893 	mutex_unlock(&access_lock);
894 }
895 
trace_access_lock_init(void)896 static inline void trace_access_lock_init(void)
897 {
898 }
899 
900 #endif
901 
902 #ifdef CONFIG_STACKTRACE
903 static void __ftrace_trace_stack(struct trace_buffer *buffer,
904 				 unsigned int trace_ctx,
905 				 int skip, struct pt_regs *regs);
906 static inline void ftrace_trace_stack(struct trace_array *tr,
907 				      struct trace_buffer *buffer,
908 				      unsigned int trace_ctx,
909 				      int skip, struct pt_regs *regs);
910 
911 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)912 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
913 					unsigned int trace_ctx,
914 					int skip, struct pt_regs *regs)
915 {
916 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)917 static inline void ftrace_trace_stack(struct trace_array *tr,
918 				      struct trace_buffer *buffer,
919 				      unsigned long trace_ctx,
920 				      int skip, struct pt_regs *regs)
921 {
922 }
923 
924 #endif
925 
926 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)927 trace_event_setup(struct ring_buffer_event *event,
928 		  int type, unsigned int trace_ctx)
929 {
930 	struct trace_entry *ent = ring_buffer_event_data(event);
931 
932 	tracing_generic_entry_update(ent, type, trace_ctx);
933 }
934 
935 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)936 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
937 			  int type,
938 			  unsigned long len,
939 			  unsigned int trace_ctx)
940 {
941 	struct ring_buffer_event *event;
942 
943 	event = ring_buffer_lock_reserve(buffer, len);
944 	if (event != NULL)
945 		trace_event_setup(event, type, trace_ctx);
946 
947 	return event;
948 }
949 
tracer_tracing_on(struct trace_array * tr)950 void tracer_tracing_on(struct trace_array *tr)
951 {
952 	if (tr->array_buffer.buffer)
953 		ring_buffer_record_on(tr->array_buffer.buffer);
954 	/*
955 	 * This flag is looked at when buffers haven't been allocated
956 	 * yet, or by some tracers (like irqsoff), that just want to
957 	 * know if the ring buffer has been disabled, but it can handle
958 	 * races of where it gets disabled but we still do a record.
959 	 * As the check is in the fast path of the tracers, it is more
960 	 * important to be fast than accurate.
961 	 */
962 	tr->buffer_disabled = 0;
963 	/* Make the flag seen by readers */
964 	smp_wmb();
965 }
966 
967 /**
968  * tracing_on - enable tracing buffers
969  *
970  * This function enables tracing buffers that may have been
971  * disabled with tracing_off.
972  */
tracing_on(void)973 void tracing_on(void)
974 {
975 	tracer_tracing_on(&global_trace);
976 }
977 EXPORT_SYMBOL_GPL(tracing_on);
978 
979 
980 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)981 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
982 {
983 	__this_cpu_write(trace_taskinfo_save, true);
984 
985 	/* If this is the temp buffer, we need to commit fully */
986 	if (this_cpu_read(trace_buffered_event) == event) {
987 		/* Length is in event->array[0] */
988 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
989 		/* Release the temp buffer */
990 		this_cpu_dec(trace_buffered_event_cnt);
991 	} else
992 		ring_buffer_unlock_commit(buffer, event);
993 }
994 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)995 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
996 		       const char *str, int size)
997 {
998 	struct ring_buffer_event *event;
999 	struct trace_buffer *buffer;
1000 	struct print_entry *entry;
1001 	unsigned int trace_ctx;
1002 	int alloc;
1003 
1004 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1005 		return 0;
1006 
1007 	if (unlikely(tracing_selftest_running || tracing_disabled))
1008 		return 0;
1009 
1010 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1011 
1012 	trace_ctx = tracing_gen_ctx();
1013 	buffer = tr->array_buffer.buffer;
1014 	ring_buffer_nest_start(buffer);
1015 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1016 					    trace_ctx);
1017 	if (!event) {
1018 		size = 0;
1019 		goto out;
1020 	}
1021 
1022 	entry = ring_buffer_event_data(event);
1023 	entry->ip = ip;
1024 
1025 	memcpy(&entry->buf, str, size);
1026 
1027 	/* Add a newline if necessary */
1028 	if (entry->buf[size - 1] != '\n') {
1029 		entry->buf[size] = '\n';
1030 		entry->buf[size + 1] = '\0';
1031 	} else
1032 		entry->buf[size] = '\0';
1033 
1034 	__buffer_unlock_commit(buffer, event);
1035 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1036  out:
1037 	ring_buffer_nest_end(buffer);
1038 	return size;
1039 }
1040 EXPORT_SYMBOL_GPL(__trace_array_puts);
1041 
1042 /**
1043  * __trace_puts - write a constant string into the trace buffer.
1044  * @ip:	   The address of the caller
1045  * @str:   The constant string to write
1046  * @size:  The size of the string.
1047  */
__trace_puts(unsigned long ip,const char * str,int size)1048 int __trace_puts(unsigned long ip, const char *str, int size)
1049 {
1050 	return __trace_array_puts(&global_trace, ip, str, size);
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053 
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:	   The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
__trace_bputs(unsigned long ip,const char * str)1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 	struct ring_buffer_event *event;
1062 	struct trace_buffer *buffer;
1063 	struct bputs_entry *entry;
1064 	unsigned int trace_ctx;
1065 	int size = sizeof(struct bputs_entry);
1066 	int ret = 0;
1067 
1068 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1069 		return 0;
1070 
1071 	if (unlikely(tracing_selftest_running || tracing_disabled))
1072 		return 0;
1073 
1074 	trace_ctx = tracing_gen_ctx();
1075 	buffer = global_trace.array_buffer.buffer;
1076 
1077 	ring_buffer_nest_start(buffer);
1078 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1079 					    trace_ctx);
1080 	if (!event)
1081 		goto out;
1082 
1083 	entry = ring_buffer_event_data(event);
1084 	entry->ip			= ip;
1085 	entry->str			= str;
1086 
1087 	__buffer_unlock_commit(buffer, event);
1088 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1089 
1090 	ret = 1;
1091  out:
1092 	ring_buffer_nest_end(buffer);
1093 	return ret;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1096 
1097 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1099 					   void *cond_data)
1100 {
1101 	struct tracer *tracer = tr->current_trace;
1102 	unsigned long flags;
1103 
1104 	if (in_nmi()) {
1105 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1107 		return;
1108 	}
1109 
1110 	if (!tr->allocated_snapshot) {
1111 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1112 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1113 		tracer_tracing_off(tr);
1114 		return;
1115 	}
1116 
1117 	/* Note, snapshot can not be used when the tracer uses it */
1118 	if (tracer->use_max_tr) {
1119 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1120 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1121 		return;
1122 	}
1123 
1124 	local_irq_save(flags);
1125 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 	local_irq_restore(flags);
1127 }
1128 
tracing_snapshot_instance(struct trace_array * tr)1129 void tracing_snapshot_instance(struct trace_array *tr)
1130 {
1131 	tracing_snapshot_instance_cond(tr, NULL);
1132 }
1133 
1134 /**
1135  * tracing_snapshot - take a snapshot of the current buffer.
1136  *
1137  * This causes a swap between the snapshot buffer and the current live
1138  * tracing buffer. You can use this to take snapshots of the live
1139  * trace when some condition is triggered, but continue to trace.
1140  *
1141  * Note, make sure to allocate the snapshot with either
1142  * a tracing_snapshot_alloc(), or by doing it manually
1143  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1144  *
1145  * If the snapshot buffer is not allocated, it will stop tracing.
1146  * Basically making a permanent snapshot.
1147  */
tracing_snapshot(void)1148 void tracing_snapshot(void)
1149 {
1150 	struct trace_array *tr = &global_trace;
1151 
1152 	tracing_snapshot_instance(tr);
1153 }
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1155 
1156 /**
1157  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158  * @tr:		The tracing instance to snapshot
1159  * @cond_data:	The data to be tested conditionally, and possibly saved
1160  *
1161  * This is the same as tracing_snapshot() except that the snapshot is
1162  * conditional - the snapshot will only happen if the
1163  * cond_snapshot.update() implementation receiving the cond_data
1164  * returns true, which means that the trace array's cond_snapshot
1165  * update() operation used the cond_data to determine whether the
1166  * snapshot should be taken, and if it was, presumably saved it along
1167  * with the snapshot.
1168  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1170 {
1171 	tracing_snapshot_instance_cond(tr, cond_data);
1172 }
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1174 
1175 /**
1176  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177  * @tr:		The tracing instance
1178  *
1179  * When the user enables a conditional snapshot using
1180  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181  * with the snapshot.  This accessor is used to retrieve it.
1182  *
1183  * Should not be called from cond_snapshot.update(), since it takes
1184  * the tr->max_lock lock, which the code calling
1185  * cond_snapshot.update() has already done.
1186  *
1187  * Returns the cond_data associated with the trace array's snapshot.
1188  */
tracing_cond_snapshot_data(struct trace_array * tr)1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1190 {
1191 	void *cond_data = NULL;
1192 
1193 	local_irq_disable();
1194 	arch_spin_lock(&tr->max_lock);
1195 
1196 	if (tr->cond_snapshot)
1197 		cond_data = tr->cond_snapshot->cond_data;
1198 
1199 	arch_spin_unlock(&tr->max_lock);
1200 	local_irq_enable();
1201 
1202 	return cond_data;
1203 }
1204 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1205 
1206 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1207 					struct array_buffer *size_buf, int cpu_id);
1208 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1209 
tracing_alloc_snapshot_instance(struct trace_array * tr)1210 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1211 {
1212 	int ret;
1213 
1214 	if (!tr->allocated_snapshot) {
1215 
1216 		/* allocate spare buffer */
1217 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1218 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1219 		if (ret < 0)
1220 			return ret;
1221 
1222 		tr->allocated_snapshot = true;
1223 	}
1224 
1225 	return 0;
1226 }
1227 
free_snapshot(struct trace_array * tr)1228 static void free_snapshot(struct trace_array *tr)
1229 {
1230 	/*
1231 	 * We don't free the ring buffer. instead, resize it because
1232 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1233 	 * we want preserve it.
1234 	 */
1235 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1236 	set_buffer_entries(&tr->max_buffer, 1);
1237 	tracing_reset_online_cpus(&tr->max_buffer);
1238 	tr->allocated_snapshot = false;
1239 }
1240 
1241 /**
1242  * tracing_alloc_snapshot - allocate snapshot buffer.
1243  *
1244  * This only allocates the snapshot buffer if it isn't already
1245  * allocated - it doesn't also take a snapshot.
1246  *
1247  * This is meant to be used in cases where the snapshot buffer needs
1248  * to be set up for events that can't sleep but need to be able to
1249  * trigger a snapshot.
1250  */
tracing_alloc_snapshot(void)1251 int tracing_alloc_snapshot(void)
1252 {
1253 	struct trace_array *tr = &global_trace;
1254 	int ret;
1255 
1256 	ret = tracing_alloc_snapshot_instance(tr);
1257 	WARN_ON(ret < 0);
1258 
1259 	return ret;
1260 }
1261 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1262 
1263 /**
1264  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1265  *
1266  * This is similar to tracing_snapshot(), but it will allocate the
1267  * snapshot buffer if it isn't already allocated. Use this only
1268  * where it is safe to sleep, as the allocation may sleep.
1269  *
1270  * This causes a swap between the snapshot buffer and the current live
1271  * tracing buffer. You can use this to take snapshots of the live
1272  * trace when some condition is triggered, but continue to trace.
1273  */
tracing_snapshot_alloc(void)1274 void tracing_snapshot_alloc(void)
1275 {
1276 	int ret;
1277 
1278 	ret = tracing_alloc_snapshot();
1279 	if (ret < 0)
1280 		return;
1281 
1282 	tracing_snapshot();
1283 }
1284 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1285 
1286 /**
1287  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1288  * @tr:		The tracing instance
1289  * @cond_data:	User data to associate with the snapshot
1290  * @update:	Implementation of the cond_snapshot update function
1291  *
1292  * Check whether the conditional snapshot for the given instance has
1293  * already been enabled, or if the current tracer is already using a
1294  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1295  * save the cond_data and update function inside.
1296  *
1297  * Returns 0 if successful, error otherwise.
1298  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1299 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1300 				 cond_update_fn_t update)
1301 {
1302 	struct cond_snapshot *cond_snapshot;
1303 	int ret = 0;
1304 
1305 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1306 	if (!cond_snapshot)
1307 		return -ENOMEM;
1308 
1309 	cond_snapshot->cond_data = cond_data;
1310 	cond_snapshot->update = update;
1311 
1312 	mutex_lock(&trace_types_lock);
1313 
1314 	ret = tracing_alloc_snapshot_instance(tr);
1315 	if (ret)
1316 		goto fail_unlock;
1317 
1318 	if (tr->current_trace->use_max_tr) {
1319 		ret = -EBUSY;
1320 		goto fail_unlock;
1321 	}
1322 
1323 	/*
1324 	 * The cond_snapshot can only change to NULL without the
1325 	 * trace_types_lock. We don't care if we race with it going
1326 	 * to NULL, but we want to make sure that it's not set to
1327 	 * something other than NULL when we get here, which we can
1328 	 * do safely with only holding the trace_types_lock and not
1329 	 * having to take the max_lock.
1330 	 */
1331 	if (tr->cond_snapshot) {
1332 		ret = -EBUSY;
1333 		goto fail_unlock;
1334 	}
1335 
1336 	local_irq_disable();
1337 	arch_spin_lock(&tr->max_lock);
1338 	tr->cond_snapshot = cond_snapshot;
1339 	arch_spin_unlock(&tr->max_lock);
1340 	local_irq_enable();
1341 
1342 	mutex_unlock(&trace_types_lock);
1343 
1344 	return ret;
1345 
1346  fail_unlock:
1347 	mutex_unlock(&trace_types_lock);
1348 	kfree(cond_snapshot);
1349 	return ret;
1350 }
1351 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1352 
1353 /**
1354  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1355  * @tr:		The tracing instance
1356  *
1357  * Check whether the conditional snapshot for the given instance is
1358  * enabled; if so, free the cond_snapshot associated with it,
1359  * otherwise return -EINVAL.
1360  *
1361  * Returns 0 if successful, error otherwise.
1362  */
tracing_snapshot_cond_disable(struct trace_array * tr)1363 int tracing_snapshot_cond_disable(struct trace_array *tr)
1364 {
1365 	int ret = 0;
1366 
1367 	local_irq_disable();
1368 	arch_spin_lock(&tr->max_lock);
1369 
1370 	if (!tr->cond_snapshot)
1371 		ret = -EINVAL;
1372 	else {
1373 		kfree(tr->cond_snapshot);
1374 		tr->cond_snapshot = NULL;
1375 	}
1376 
1377 	arch_spin_unlock(&tr->max_lock);
1378 	local_irq_enable();
1379 
1380 	return ret;
1381 }
1382 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1383 #else
tracing_snapshot(void)1384 void tracing_snapshot(void)
1385 {
1386 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1387 }
1388 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1389 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1390 {
1391 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1392 }
1393 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1394 int tracing_alloc_snapshot(void)
1395 {
1396 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1397 	return -ENODEV;
1398 }
1399 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1400 void tracing_snapshot_alloc(void)
1401 {
1402 	/* Give warning */
1403 	tracing_snapshot();
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1406 void *tracing_cond_snapshot_data(struct trace_array *tr)
1407 {
1408 	return NULL;
1409 }
1410 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1411 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1412 {
1413 	return -ENODEV;
1414 }
1415 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1416 int tracing_snapshot_cond_disable(struct trace_array *tr)
1417 {
1418 	return false;
1419 }
1420 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1421 #define free_snapshot(tr)	do { } while (0)
1422 #endif /* CONFIG_TRACER_SNAPSHOT */
1423 
tracer_tracing_off(struct trace_array * tr)1424 void tracer_tracing_off(struct trace_array *tr)
1425 {
1426 	if (tr->array_buffer.buffer)
1427 		ring_buffer_record_off(tr->array_buffer.buffer);
1428 	/*
1429 	 * This flag is looked at when buffers haven't been allocated
1430 	 * yet, or by some tracers (like irqsoff), that just want to
1431 	 * know if the ring buffer has been disabled, but it can handle
1432 	 * races of where it gets disabled but we still do a record.
1433 	 * As the check is in the fast path of the tracers, it is more
1434 	 * important to be fast than accurate.
1435 	 */
1436 	tr->buffer_disabled = 1;
1437 	/* Make the flag seen by readers */
1438 	smp_wmb();
1439 }
1440 
1441 /**
1442  * tracing_off - turn off tracing buffers
1443  *
1444  * This function stops the tracing buffers from recording data.
1445  * It does not disable any overhead the tracers themselves may
1446  * be causing. This function simply causes all recording to
1447  * the ring buffers to fail.
1448  */
tracing_off(void)1449 void tracing_off(void)
1450 {
1451 	tracer_tracing_off(&global_trace);
1452 }
1453 EXPORT_SYMBOL_GPL(tracing_off);
1454 
disable_trace_on_warning(void)1455 void disable_trace_on_warning(void)
1456 {
1457 	if (__disable_trace_on_warning) {
1458 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1459 			"Disabling tracing due to warning\n");
1460 		tracing_off();
1461 	}
1462 }
1463 
1464 /**
1465  * tracer_tracing_is_on - show real state of ring buffer enabled
1466  * @tr : the trace array to know if ring buffer is enabled
1467  *
1468  * Shows real state of the ring buffer if it is enabled or not.
1469  */
tracer_tracing_is_on(struct trace_array * tr)1470 bool tracer_tracing_is_on(struct trace_array *tr)
1471 {
1472 	if (tr->array_buffer.buffer)
1473 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1474 	return !tr->buffer_disabled;
1475 }
1476 
1477 /**
1478  * tracing_is_on - show state of ring buffers enabled
1479  */
tracing_is_on(void)1480 int tracing_is_on(void)
1481 {
1482 	return tracer_tracing_is_on(&global_trace);
1483 }
1484 EXPORT_SYMBOL_GPL(tracing_is_on);
1485 
set_buf_size(char * str)1486 static int __init set_buf_size(char *str)
1487 {
1488 	unsigned long buf_size;
1489 
1490 	if (!str)
1491 		return 0;
1492 	buf_size = memparse(str, &str);
1493 	/*
1494 	 * nr_entries can not be zero and the startup
1495 	 * tests require some buffer space. Therefore
1496 	 * ensure we have at least 4096 bytes of buffer.
1497 	 */
1498 	trace_buf_size = max(4096UL, buf_size);
1499 	return 1;
1500 }
1501 __setup("trace_buf_size=", set_buf_size);
1502 
set_tracing_thresh(char * str)1503 static int __init set_tracing_thresh(char *str)
1504 {
1505 	unsigned long threshold;
1506 	int ret;
1507 
1508 	if (!str)
1509 		return 0;
1510 	ret = kstrtoul(str, 0, &threshold);
1511 	if (ret < 0)
1512 		return 0;
1513 	tracing_thresh = threshold * 1000;
1514 	return 1;
1515 }
1516 __setup("tracing_thresh=", set_tracing_thresh);
1517 
nsecs_to_usecs(unsigned long nsecs)1518 unsigned long nsecs_to_usecs(unsigned long nsecs)
1519 {
1520 	return nsecs / 1000;
1521 }
1522 
1523 /*
1524  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527  * of strings in the order that the evals (enum) were defined.
1528  */
1529 #undef C
1530 #define C(a, b) b
1531 
1532 /* These must match the bit positions in trace_iterator_flags */
1533 static const char *trace_options[] = {
1534 	TRACE_FLAGS
1535 	NULL
1536 };
1537 
1538 static struct {
1539 	u64 (*func)(void);
1540 	const char *name;
1541 	int in_ns;		/* is this clock in nanoseconds? */
1542 } trace_clocks[] = {
1543 	{ trace_clock_local,		"local",	1 },
1544 	{ trace_clock_global,		"global",	1 },
1545 	{ trace_clock_counter,		"counter",	0 },
1546 	{ trace_clock_jiffies,		"uptime",	0 },
1547 	{ trace_clock,			"perf",		1 },
1548 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1549 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1550 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1551 	ARCH_TRACE_CLOCKS
1552 };
1553 
trace_clock_in_ns(struct trace_array * tr)1554 bool trace_clock_in_ns(struct trace_array *tr)
1555 {
1556 	if (trace_clocks[tr->clock_id].in_ns)
1557 		return true;
1558 
1559 	return false;
1560 }
1561 
1562 /*
1563  * trace_parser_get_init - gets the buffer for trace parser
1564  */
trace_parser_get_init(struct trace_parser * parser,int size)1565 int trace_parser_get_init(struct trace_parser *parser, int size)
1566 {
1567 	memset(parser, 0, sizeof(*parser));
1568 
1569 	parser->buffer = kmalloc(size, GFP_KERNEL);
1570 	if (!parser->buffer)
1571 		return 1;
1572 
1573 	parser->size = size;
1574 	return 0;
1575 }
1576 
1577 /*
1578  * trace_parser_put - frees the buffer for trace parser
1579  */
trace_parser_put(struct trace_parser * parser)1580 void trace_parser_put(struct trace_parser *parser)
1581 {
1582 	kfree(parser->buffer);
1583 	parser->buffer = NULL;
1584 }
1585 
1586 /*
1587  * trace_get_user - reads the user input string separated by  space
1588  * (matched by isspace(ch))
1589  *
1590  * For each string found the 'struct trace_parser' is updated,
1591  * and the function returns.
1592  *
1593  * Returns number of bytes read.
1594  *
1595  * See kernel/trace/trace.h for 'struct trace_parser' details.
1596  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1597 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 	size_t cnt, loff_t *ppos)
1599 {
1600 	char ch;
1601 	size_t read = 0;
1602 	ssize_t ret;
1603 
1604 	if (!*ppos)
1605 		trace_parser_clear(parser);
1606 
1607 	ret = get_user(ch, ubuf++);
1608 	if (ret)
1609 		goto out;
1610 
1611 	read++;
1612 	cnt--;
1613 
1614 	/*
1615 	 * The parser is not finished with the last write,
1616 	 * continue reading the user input without skipping spaces.
1617 	 */
1618 	if (!parser->cont) {
1619 		/* skip white space */
1620 		while (cnt && isspace(ch)) {
1621 			ret = get_user(ch, ubuf++);
1622 			if (ret)
1623 				goto out;
1624 			read++;
1625 			cnt--;
1626 		}
1627 
1628 		parser->idx = 0;
1629 
1630 		/* only spaces were written */
1631 		if (isspace(ch) || !ch) {
1632 			*ppos += read;
1633 			ret = read;
1634 			goto out;
1635 		}
1636 	}
1637 
1638 	/* read the non-space input */
1639 	while (cnt && !isspace(ch) && ch) {
1640 		if (parser->idx < parser->size - 1)
1641 			parser->buffer[parser->idx++] = ch;
1642 		else {
1643 			ret = -EINVAL;
1644 			goto out;
1645 		}
1646 		ret = get_user(ch, ubuf++);
1647 		if (ret)
1648 			goto out;
1649 		read++;
1650 		cnt--;
1651 	}
1652 
1653 	/* We either got finished input or we have to wait for another call. */
1654 	if (isspace(ch) || !ch) {
1655 		parser->buffer[parser->idx] = 0;
1656 		parser->cont = false;
1657 	} else if (parser->idx < parser->size - 1) {
1658 		parser->cont = true;
1659 		parser->buffer[parser->idx++] = ch;
1660 		/* Make sure the parsed string always terminates with '\0'. */
1661 		parser->buffer[parser->idx] = 0;
1662 	} else {
1663 		ret = -EINVAL;
1664 		goto out;
1665 	}
1666 
1667 	*ppos += read;
1668 	ret = read;
1669 
1670 out:
1671 	return ret;
1672 }
1673 
1674 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1675 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676 {
1677 	int len;
1678 
1679 	if (trace_seq_used(s) <= s->seq.readpos)
1680 		return -EBUSY;
1681 
1682 	len = trace_seq_used(s) - s->seq.readpos;
1683 	if (cnt > len)
1684 		cnt = len;
1685 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686 
1687 	s->seq.readpos += cnt;
1688 	return cnt;
1689 }
1690 
1691 unsigned long __read_mostly	tracing_thresh;
1692 
1693 #ifdef CONFIG_TRACER_MAX_TRACE
1694 static const struct file_operations tracing_max_lat_fops;
1695 
1696 #ifdef LATENCY_FS_NOTIFY
1697 
1698 static struct workqueue_struct *fsnotify_wq;
1699 
latency_fsnotify_workfn(struct work_struct * work)1700 static void latency_fsnotify_workfn(struct work_struct *work)
1701 {
1702 	struct trace_array *tr = container_of(work, struct trace_array,
1703 					      fsnotify_work);
1704 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1705 }
1706 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1707 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1708 {
1709 	struct trace_array *tr = container_of(iwork, struct trace_array,
1710 					      fsnotify_irqwork);
1711 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1712 }
1713 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1714 static void trace_create_maxlat_file(struct trace_array *tr,
1715 				     struct dentry *d_tracer)
1716 {
1717 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1718 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1719 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1720 					      TRACE_MODE_WRITE,
1721 					      d_tracer, tr,
1722 					      &tracing_max_lat_fops);
1723 }
1724 
latency_fsnotify_init(void)1725 __init static int latency_fsnotify_init(void)
1726 {
1727 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1728 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1729 	if (!fsnotify_wq) {
1730 		pr_err("Unable to allocate tr_max_lat_wq\n");
1731 		return -ENOMEM;
1732 	}
1733 	return 0;
1734 }
1735 
1736 late_initcall_sync(latency_fsnotify_init);
1737 
latency_fsnotify(struct trace_array * tr)1738 void latency_fsnotify(struct trace_array *tr)
1739 {
1740 	if (!fsnotify_wq)
1741 		return;
1742 	/*
1743 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1744 	 * possible that we are called from __schedule() or do_idle(), which
1745 	 * could cause a deadlock.
1746 	 */
1747 	irq_work_queue(&tr->fsnotify_irqwork);
1748 }
1749 
1750 #else /* !LATENCY_FS_NOTIFY */
1751 
1752 #define trace_create_maxlat_file(tr, d_tracer)				\
1753 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1754 			  d_tracer, tr, &tracing_max_lat_fops)
1755 
1756 #endif
1757 
1758 /*
1759  * Copy the new maximum trace into the separate maximum-trace
1760  * structure. (this way the maximum trace is permanently saved,
1761  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1762  */
1763 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1764 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1765 {
1766 	struct array_buffer *trace_buf = &tr->array_buffer;
1767 	struct array_buffer *max_buf = &tr->max_buffer;
1768 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1769 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1770 
1771 	max_buf->cpu = cpu;
1772 	max_buf->time_start = data->preempt_timestamp;
1773 
1774 	max_data->saved_latency = tr->max_latency;
1775 	max_data->critical_start = data->critical_start;
1776 	max_data->critical_end = data->critical_end;
1777 
1778 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1779 	max_data->pid = tsk->pid;
1780 	/*
1781 	 * If tsk == current, then use current_uid(), as that does not use
1782 	 * RCU. The irq tracer can be called out of RCU scope.
1783 	 */
1784 	if (tsk == current)
1785 		max_data->uid = current_uid();
1786 	else
1787 		max_data->uid = task_uid(tsk);
1788 
1789 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1790 	max_data->policy = tsk->policy;
1791 	max_data->rt_priority = tsk->rt_priority;
1792 
1793 	/* record this tasks comm */
1794 	tracing_record_cmdline(tsk);
1795 	latency_fsnotify(tr);
1796 }
1797 
1798 /**
1799  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1800  * @tr: tracer
1801  * @tsk: the task with the latency
1802  * @cpu: The cpu that initiated the trace.
1803  * @cond_data: User data associated with a conditional snapshot
1804  *
1805  * Flip the buffers between the @tr and the max_tr and record information
1806  * about which task was the cause of this latency.
1807  */
1808 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1809 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1810 	      void *cond_data)
1811 {
1812 	if (tr->stop_count)
1813 		return;
1814 
1815 	WARN_ON_ONCE(!irqs_disabled());
1816 
1817 	if (!tr->allocated_snapshot) {
1818 		/* Only the nop tracer should hit this when disabling */
1819 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1820 		return;
1821 	}
1822 
1823 	arch_spin_lock(&tr->max_lock);
1824 
1825 	/* Inherit the recordable setting from array_buffer */
1826 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1827 		ring_buffer_record_on(tr->max_buffer.buffer);
1828 	else
1829 		ring_buffer_record_off(tr->max_buffer.buffer);
1830 
1831 #ifdef CONFIG_TRACER_SNAPSHOT
1832 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1833 		arch_spin_unlock(&tr->max_lock);
1834 		return;
1835 	}
1836 #endif
1837 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1838 
1839 	__update_max_tr(tr, tsk, cpu);
1840 
1841 	arch_spin_unlock(&tr->max_lock);
1842 
1843 	/* Any waiters on the old snapshot buffer need to wake up */
1844 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1845 }
1846 
1847 /**
1848  * update_max_tr_single - only copy one trace over, and reset the rest
1849  * @tr: tracer
1850  * @tsk: task with the latency
1851  * @cpu: the cpu of the buffer to copy.
1852  *
1853  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1854  */
1855 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1856 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1857 {
1858 	int ret;
1859 
1860 	if (tr->stop_count)
1861 		return;
1862 
1863 	WARN_ON_ONCE(!irqs_disabled());
1864 	if (!tr->allocated_snapshot) {
1865 		/* Only the nop tracer should hit this when disabling */
1866 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1867 		return;
1868 	}
1869 
1870 	arch_spin_lock(&tr->max_lock);
1871 
1872 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1873 
1874 	if (ret == -EBUSY) {
1875 		/*
1876 		 * We failed to swap the buffer due to a commit taking
1877 		 * place on this CPU. We fail to record, but we reset
1878 		 * the max trace buffer (no one writes directly to it)
1879 		 * and flag that it failed.
1880 		 * Another reason is resize is in progress.
1881 		 */
1882 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1883 			"Failed to swap buffers due to commit or resize in progress\n");
1884 	}
1885 
1886 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1887 
1888 	__update_max_tr(tr, tsk, cpu);
1889 	arch_spin_unlock(&tr->max_lock);
1890 }
1891 
1892 #endif /* CONFIG_TRACER_MAX_TRACE */
1893 
wait_on_pipe(struct trace_iterator * iter,int full)1894 static int wait_on_pipe(struct trace_iterator *iter, int full)
1895 {
1896 	int ret;
1897 
1898 	/* Iterators are static, they should be filled or empty */
1899 	if (trace_buffer_iter(iter, iter->cpu_file))
1900 		return 0;
1901 
1902 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1903 
1904 #ifdef CONFIG_TRACER_MAX_TRACE
1905 	/*
1906 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1907 	 * to happen, this would now be the main buffer.
1908 	 */
1909 	if (iter->snapshot)
1910 		iter->array_buffer = &iter->tr->max_buffer;
1911 #endif
1912 	return ret;
1913 }
1914 
1915 #ifdef CONFIG_FTRACE_STARTUP_TEST
1916 static bool selftests_can_run;
1917 
1918 struct trace_selftests {
1919 	struct list_head		list;
1920 	struct tracer			*type;
1921 };
1922 
1923 static LIST_HEAD(postponed_selftests);
1924 
save_selftest(struct tracer * type)1925 static int save_selftest(struct tracer *type)
1926 {
1927 	struct trace_selftests *selftest;
1928 
1929 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1930 	if (!selftest)
1931 		return -ENOMEM;
1932 
1933 	selftest->type = type;
1934 	list_add(&selftest->list, &postponed_selftests);
1935 	return 0;
1936 }
1937 
run_tracer_selftest(struct tracer * type)1938 static int run_tracer_selftest(struct tracer *type)
1939 {
1940 	struct trace_array *tr = &global_trace;
1941 	struct tracer *saved_tracer = tr->current_trace;
1942 	int ret;
1943 
1944 	if (!type->selftest || tracing_selftest_disabled)
1945 		return 0;
1946 
1947 	/*
1948 	 * If a tracer registers early in boot up (before scheduling is
1949 	 * initialized and such), then do not run its selftests yet.
1950 	 * Instead, run it a little later in the boot process.
1951 	 */
1952 	if (!selftests_can_run)
1953 		return save_selftest(type);
1954 
1955 	if (!tracing_is_on()) {
1956 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1957 			type->name);
1958 		return 0;
1959 	}
1960 
1961 	/*
1962 	 * Run a selftest on this tracer.
1963 	 * Here we reset the trace buffer, and set the current
1964 	 * tracer to be this tracer. The tracer can then run some
1965 	 * internal tracing to verify that everything is in order.
1966 	 * If we fail, we do not register this tracer.
1967 	 */
1968 	tracing_reset_online_cpus(&tr->array_buffer);
1969 
1970 	tr->current_trace = type;
1971 
1972 #ifdef CONFIG_TRACER_MAX_TRACE
1973 	if (type->use_max_tr) {
1974 		/* If we expanded the buffers, make sure the max is expanded too */
1975 		if (ring_buffer_expanded)
1976 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1977 					   RING_BUFFER_ALL_CPUS);
1978 		tr->allocated_snapshot = true;
1979 	}
1980 #endif
1981 
1982 	/* the test is responsible for initializing and enabling */
1983 	pr_info("Testing tracer %s: ", type->name);
1984 	ret = type->selftest(type, tr);
1985 	/* the test is responsible for resetting too */
1986 	tr->current_trace = saved_tracer;
1987 	if (ret) {
1988 		printk(KERN_CONT "FAILED!\n");
1989 		/* Add the warning after printing 'FAILED' */
1990 		WARN_ON(1);
1991 		return -1;
1992 	}
1993 	/* Only reset on passing, to avoid touching corrupted buffers */
1994 	tracing_reset_online_cpus(&tr->array_buffer);
1995 
1996 #ifdef CONFIG_TRACER_MAX_TRACE
1997 	if (type->use_max_tr) {
1998 		tr->allocated_snapshot = false;
1999 
2000 		/* Shrink the max buffer again */
2001 		if (ring_buffer_expanded)
2002 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2003 					   RING_BUFFER_ALL_CPUS);
2004 	}
2005 #endif
2006 
2007 	printk(KERN_CONT "PASSED\n");
2008 	return 0;
2009 }
2010 
init_trace_selftests(void)2011 static __init int init_trace_selftests(void)
2012 {
2013 	struct trace_selftests *p, *n;
2014 	struct tracer *t, **last;
2015 	int ret;
2016 
2017 	selftests_can_run = true;
2018 
2019 	mutex_lock(&trace_types_lock);
2020 
2021 	if (list_empty(&postponed_selftests))
2022 		goto out;
2023 
2024 	pr_info("Running postponed tracer tests:\n");
2025 
2026 	tracing_selftest_running = true;
2027 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2028 		/* This loop can take minutes when sanitizers are enabled, so
2029 		 * lets make sure we allow RCU processing.
2030 		 */
2031 		cond_resched();
2032 		ret = run_tracer_selftest(p->type);
2033 		/* If the test fails, then warn and remove from available_tracers */
2034 		if (ret < 0) {
2035 			WARN(1, "tracer: %s failed selftest, disabling\n",
2036 			     p->type->name);
2037 			last = &trace_types;
2038 			for (t = trace_types; t; t = t->next) {
2039 				if (t == p->type) {
2040 					*last = t->next;
2041 					break;
2042 				}
2043 				last = &t->next;
2044 			}
2045 		}
2046 		list_del(&p->list);
2047 		kfree(p);
2048 	}
2049 	tracing_selftest_running = false;
2050 
2051  out:
2052 	mutex_unlock(&trace_types_lock);
2053 
2054 	return 0;
2055 }
2056 core_initcall(init_trace_selftests);
2057 #else
run_tracer_selftest(struct tracer * type)2058 static inline int run_tracer_selftest(struct tracer *type)
2059 {
2060 	return 0;
2061 }
2062 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2063 
2064 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2065 
2066 static void __init apply_trace_boot_options(void);
2067 
2068 /**
2069  * register_tracer - register a tracer with the ftrace system.
2070  * @type: the plugin for the tracer
2071  *
2072  * Register a new plugin tracer.
2073  */
register_tracer(struct tracer * type)2074 int __init register_tracer(struct tracer *type)
2075 {
2076 	struct tracer *t;
2077 	int ret = 0;
2078 
2079 	if (!type->name) {
2080 		pr_info("Tracer must have a name\n");
2081 		return -1;
2082 	}
2083 
2084 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2085 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2086 		return -1;
2087 	}
2088 
2089 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2090 		pr_warn("Can not register tracer %s due to lockdown\n",
2091 			   type->name);
2092 		return -EPERM;
2093 	}
2094 
2095 	mutex_lock(&trace_types_lock);
2096 
2097 	tracing_selftest_running = true;
2098 
2099 	for (t = trace_types; t; t = t->next) {
2100 		if (strcmp(type->name, t->name) == 0) {
2101 			/* already found */
2102 			pr_info("Tracer %s already registered\n",
2103 				type->name);
2104 			ret = -1;
2105 			goto out;
2106 		}
2107 	}
2108 
2109 	if (!type->set_flag)
2110 		type->set_flag = &dummy_set_flag;
2111 	if (!type->flags) {
2112 		/*allocate a dummy tracer_flags*/
2113 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2114 		if (!type->flags) {
2115 			ret = -ENOMEM;
2116 			goto out;
2117 		}
2118 		type->flags->val = 0;
2119 		type->flags->opts = dummy_tracer_opt;
2120 	} else
2121 		if (!type->flags->opts)
2122 			type->flags->opts = dummy_tracer_opt;
2123 
2124 	/* store the tracer for __set_tracer_option */
2125 	type->flags->trace = type;
2126 
2127 	ret = run_tracer_selftest(type);
2128 	if (ret < 0)
2129 		goto out;
2130 
2131 	type->next = trace_types;
2132 	trace_types = type;
2133 	add_tracer_options(&global_trace, type);
2134 
2135  out:
2136 	tracing_selftest_running = false;
2137 	mutex_unlock(&trace_types_lock);
2138 
2139 	if (ret || !default_bootup_tracer)
2140 		goto out_unlock;
2141 
2142 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2143 		goto out_unlock;
2144 
2145 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2146 	/* Do we want this tracer to start on bootup? */
2147 	tracing_set_tracer(&global_trace, type->name);
2148 	default_bootup_tracer = NULL;
2149 
2150 	apply_trace_boot_options();
2151 
2152 	/* disable other selftests, since this will break it. */
2153 	disable_tracing_selftest("running a tracer");
2154 
2155  out_unlock:
2156 	return ret;
2157 }
2158 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2159 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2160 {
2161 	struct trace_buffer *buffer = buf->buffer;
2162 
2163 	if (!buffer)
2164 		return;
2165 
2166 	ring_buffer_record_disable(buffer);
2167 
2168 	/* Make sure all commits have finished */
2169 	synchronize_rcu();
2170 	ring_buffer_reset_cpu(buffer, cpu);
2171 
2172 	ring_buffer_record_enable(buffer);
2173 }
2174 
tracing_reset_online_cpus(struct array_buffer * buf)2175 void tracing_reset_online_cpus(struct array_buffer *buf)
2176 {
2177 	struct trace_buffer *buffer = buf->buffer;
2178 
2179 	if (!buffer)
2180 		return;
2181 
2182 	ring_buffer_record_disable(buffer);
2183 
2184 	/* Make sure all commits have finished */
2185 	synchronize_rcu();
2186 
2187 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2188 
2189 	ring_buffer_reset_online_cpus(buffer);
2190 
2191 	ring_buffer_record_enable(buffer);
2192 }
2193 
2194 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2195 void tracing_reset_all_online_cpus_unlocked(void)
2196 {
2197 	struct trace_array *tr;
2198 
2199 	lockdep_assert_held(&trace_types_lock);
2200 
2201 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2202 		if (!tr->clear_trace)
2203 			continue;
2204 		tr->clear_trace = false;
2205 		tracing_reset_online_cpus(&tr->array_buffer);
2206 #ifdef CONFIG_TRACER_MAX_TRACE
2207 		tracing_reset_online_cpus(&tr->max_buffer);
2208 #endif
2209 	}
2210 }
2211 
tracing_reset_all_online_cpus(void)2212 void tracing_reset_all_online_cpus(void)
2213 {
2214 	mutex_lock(&trace_types_lock);
2215 	tracing_reset_all_online_cpus_unlocked();
2216 	mutex_unlock(&trace_types_lock);
2217 }
2218 
2219 /*
2220  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2221  * is the tgid last observed corresponding to pid=i.
2222  */
2223 static int *tgid_map;
2224 
2225 /* The maximum valid index into tgid_map. */
2226 static size_t tgid_map_max;
2227 
2228 #define SAVED_CMDLINES_DEFAULT 128
2229 #define NO_CMDLINE_MAP UINT_MAX
2230 /*
2231  * Preemption must be disabled before acquiring trace_cmdline_lock.
2232  * The various trace_arrays' max_lock must be acquired in a context
2233  * where interrupt is disabled.
2234  */
2235 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2236 struct saved_cmdlines_buffer {
2237 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2238 	unsigned *map_cmdline_to_pid;
2239 	unsigned cmdline_num;
2240 	int cmdline_idx;
2241 	char saved_cmdlines[];
2242 };
2243 static struct saved_cmdlines_buffer *savedcmd;
2244 
get_saved_cmdlines(int idx)2245 static inline char *get_saved_cmdlines(int idx)
2246 {
2247 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2248 }
2249 
set_cmdline(int idx,const char * cmdline)2250 static inline void set_cmdline(int idx, const char *cmdline)
2251 {
2252 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2253 }
2254 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)2255 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2256 {
2257 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2258 
2259 	kfree(s->map_cmdline_to_pid);
2260 	kmemleak_free(s);
2261 	free_pages((unsigned long)s, order);
2262 }
2263 
allocate_cmdlines_buffer(unsigned int val)2264 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2265 {
2266 	struct saved_cmdlines_buffer *s;
2267 	struct page *page;
2268 	int orig_size, size;
2269 	int order;
2270 
2271 	/* Figure out how much is needed to hold the given number of cmdlines */
2272 	orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2273 	order = get_order(orig_size);
2274 	size = 1 << (order + PAGE_SHIFT);
2275 	page = alloc_pages(GFP_KERNEL, order);
2276 	if (!page)
2277 		return NULL;
2278 
2279 	s = page_address(page);
2280 	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2281 	memset(s, 0, sizeof(*s));
2282 
2283 	/* Round up to actual allocation */
2284 	val = (size - sizeof(*s)) / TASK_COMM_LEN;
2285 	s->cmdline_num = val;
2286 
2287 	s->map_cmdline_to_pid = kmalloc_array(val,
2288 					      sizeof(*s->map_cmdline_to_pid),
2289 					      GFP_KERNEL);
2290 	if (!s->map_cmdline_to_pid) {
2291 		free_saved_cmdlines_buffer(s);
2292 		return NULL;
2293 	}
2294 
2295 	s->cmdline_idx = 0;
2296 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2297 	       sizeof(s->map_pid_to_cmdline));
2298 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2299 	       val * sizeof(*s->map_cmdline_to_pid));
2300 
2301 	return s;
2302 }
2303 
trace_create_savedcmd(void)2304 static int trace_create_savedcmd(void)
2305 {
2306 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2307 
2308 	return savedcmd ? 0 : -ENOMEM;
2309 }
2310 
is_tracing_stopped(void)2311 int is_tracing_stopped(void)
2312 {
2313 	return global_trace.stop_count;
2314 }
2315 
tracing_start_tr(struct trace_array * tr)2316 static void tracing_start_tr(struct trace_array *tr)
2317 {
2318 	struct trace_buffer *buffer;
2319 	unsigned long flags;
2320 
2321 	if (tracing_disabled)
2322 		return;
2323 
2324 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2325 	if (--tr->stop_count) {
2326 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2327 			/* Someone screwed up their debugging */
2328 			tr->stop_count = 0;
2329 		}
2330 		goto out;
2331 	}
2332 
2333 	/* Prevent the buffers from switching */
2334 	arch_spin_lock(&tr->max_lock);
2335 
2336 	buffer = tr->array_buffer.buffer;
2337 	if (buffer)
2338 		ring_buffer_record_enable(buffer);
2339 
2340 #ifdef CONFIG_TRACER_MAX_TRACE
2341 	buffer = tr->max_buffer.buffer;
2342 	if (buffer)
2343 		ring_buffer_record_enable(buffer);
2344 #endif
2345 
2346 	arch_spin_unlock(&tr->max_lock);
2347 
2348  out:
2349 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2350 }
2351 
2352 /**
2353  * tracing_start - quick start of the tracer
2354  *
2355  * If tracing is enabled but was stopped by tracing_stop,
2356  * this will start the tracer back up.
2357  */
tracing_start(void)2358 void tracing_start(void)
2359 
2360 {
2361 	return tracing_start_tr(&global_trace);
2362 }
2363 
tracing_stop_tr(struct trace_array * tr)2364 static void tracing_stop_tr(struct trace_array *tr)
2365 {
2366 	struct trace_buffer *buffer;
2367 	unsigned long flags;
2368 
2369 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2370 	if (tr->stop_count++)
2371 		goto out;
2372 
2373 	/* Prevent the buffers from switching */
2374 	arch_spin_lock(&tr->max_lock);
2375 
2376 	buffer = tr->array_buffer.buffer;
2377 	if (buffer)
2378 		ring_buffer_record_disable(buffer);
2379 
2380 #ifdef CONFIG_TRACER_MAX_TRACE
2381 	buffer = tr->max_buffer.buffer;
2382 	if (buffer)
2383 		ring_buffer_record_disable(buffer);
2384 #endif
2385 
2386 	arch_spin_unlock(&tr->max_lock);
2387 
2388  out:
2389 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2390 }
2391 
2392 /**
2393  * tracing_stop - quick stop of the tracer
2394  *
2395  * Light weight way to stop tracing. Use in conjunction with
2396  * tracing_start.
2397  */
tracing_stop(void)2398 void tracing_stop(void)
2399 {
2400 	return tracing_stop_tr(&global_trace);
2401 }
2402 
trace_save_cmdline(struct task_struct * tsk)2403 static int trace_save_cmdline(struct task_struct *tsk)
2404 {
2405 	unsigned tpid, idx;
2406 
2407 	/* treat recording of idle task as a success */
2408 	if (!tsk->pid)
2409 		return 1;
2410 
2411 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2412 
2413 	/*
2414 	 * It's not the end of the world if we don't get
2415 	 * the lock, but we also don't want to spin
2416 	 * nor do we want to disable interrupts,
2417 	 * so if we miss here, then better luck next time.
2418 	 *
2419 	 * This is called within the scheduler and wake up, so interrupts
2420 	 * had better been disabled and run queue lock been held.
2421 	 */
2422 	lockdep_assert_preemption_disabled();
2423 	if (!arch_spin_trylock(&trace_cmdline_lock))
2424 		return 0;
2425 
2426 	idx = savedcmd->map_pid_to_cmdline[tpid];
2427 	if (idx == NO_CMDLINE_MAP) {
2428 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2429 
2430 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2431 		savedcmd->cmdline_idx = idx;
2432 	}
2433 
2434 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2435 	set_cmdline(idx, tsk->comm);
2436 
2437 	arch_spin_unlock(&trace_cmdline_lock);
2438 
2439 	return 1;
2440 }
2441 
__trace_find_cmdline(int pid,char comm[])2442 static void __trace_find_cmdline(int pid, char comm[])
2443 {
2444 	unsigned map;
2445 	int tpid;
2446 
2447 	if (!pid) {
2448 		strcpy(comm, "<idle>");
2449 		return;
2450 	}
2451 
2452 	if (WARN_ON_ONCE(pid < 0)) {
2453 		strcpy(comm, "<XXX>");
2454 		return;
2455 	}
2456 
2457 	tpid = pid & (PID_MAX_DEFAULT - 1);
2458 	map = savedcmd->map_pid_to_cmdline[tpid];
2459 	if (map != NO_CMDLINE_MAP) {
2460 		tpid = savedcmd->map_cmdline_to_pid[map];
2461 		if (tpid == pid) {
2462 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2463 			return;
2464 		}
2465 	}
2466 	strcpy(comm, "<...>");
2467 }
2468 
trace_find_cmdline(int pid,char comm[])2469 void trace_find_cmdline(int pid, char comm[])
2470 {
2471 	preempt_disable();
2472 	arch_spin_lock(&trace_cmdline_lock);
2473 
2474 	__trace_find_cmdline(pid, comm);
2475 
2476 	arch_spin_unlock(&trace_cmdline_lock);
2477 	preempt_enable();
2478 }
2479 
trace_find_tgid_ptr(int pid)2480 static int *trace_find_tgid_ptr(int pid)
2481 {
2482 	/*
2483 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2484 	 * if we observe a non-NULL tgid_map then we also observe the correct
2485 	 * tgid_map_max.
2486 	 */
2487 	int *map = smp_load_acquire(&tgid_map);
2488 
2489 	if (unlikely(!map || pid > tgid_map_max))
2490 		return NULL;
2491 
2492 	return &map[pid];
2493 }
2494 
trace_find_tgid(int pid)2495 int trace_find_tgid(int pid)
2496 {
2497 	int *ptr = trace_find_tgid_ptr(pid);
2498 
2499 	return ptr ? *ptr : 0;
2500 }
2501 
trace_save_tgid(struct task_struct * tsk)2502 static int trace_save_tgid(struct task_struct *tsk)
2503 {
2504 	int *ptr;
2505 
2506 	/* treat recording of idle task as a success */
2507 	if (!tsk->pid)
2508 		return 1;
2509 
2510 	ptr = trace_find_tgid_ptr(tsk->pid);
2511 	if (!ptr)
2512 		return 0;
2513 
2514 	*ptr = tsk->tgid;
2515 	return 1;
2516 }
2517 
tracing_record_taskinfo_skip(int flags)2518 static bool tracing_record_taskinfo_skip(int flags)
2519 {
2520 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2521 		return true;
2522 	if (!__this_cpu_read(trace_taskinfo_save))
2523 		return true;
2524 	return false;
2525 }
2526 
2527 /**
2528  * tracing_record_taskinfo - record the task info of a task
2529  *
2530  * @task:  task to record
2531  * @flags: TRACE_RECORD_CMDLINE for recording comm
2532  *         TRACE_RECORD_TGID for recording tgid
2533  */
tracing_record_taskinfo(struct task_struct * task,int flags)2534 void tracing_record_taskinfo(struct task_struct *task, int flags)
2535 {
2536 	bool done;
2537 
2538 	if (tracing_record_taskinfo_skip(flags))
2539 		return;
2540 
2541 	/*
2542 	 * Record as much task information as possible. If some fail, continue
2543 	 * to try to record the others.
2544 	 */
2545 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2546 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2547 
2548 	/* If recording any information failed, retry again soon. */
2549 	if (!done)
2550 		return;
2551 
2552 	__this_cpu_write(trace_taskinfo_save, false);
2553 }
2554 
2555 /**
2556  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2557  *
2558  * @prev: previous task during sched_switch
2559  * @next: next task during sched_switch
2560  * @flags: TRACE_RECORD_CMDLINE for recording comm
2561  *         TRACE_RECORD_TGID for recording tgid
2562  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2563 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2564 					  struct task_struct *next, int flags)
2565 {
2566 	bool done;
2567 
2568 	if (tracing_record_taskinfo_skip(flags))
2569 		return;
2570 
2571 	/*
2572 	 * Record as much task information as possible. If some fail, continue
2573 	 * to try to record the others.
2574 	 */
2575 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2576 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2577 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2578 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2579 
2580 	/* If recording any information failed, retry again soon. */
2581 	if (!done)
2582 		return;
2583 
2584 	__this_cpu_write(trace_taskinfo_save, false);
2585 }
2586 
2587 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2588 void tracing_record_cmdline(struct task_struct *task)
2589 {
2590 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2591 }
2592 
tracing_record_tgid(struct task_struct * task)2593 void tracing_record_tgid(struct task_struct *task)
2594 {
2595 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2596 }
2597 
2598 /*
2599  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2600  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2601  * simplifies those functions and keeps them in sync.
2602  */
trace_handle_return(struct trace_seq * s)2603 enum print_line_t trace_handle_return(struct trace_seq *s)
2604 {
2605 	return trace_seq_has_overflowed(s) ?
2606 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2607 }
2608 EXPORT_SYMBOL_GPL(trace_handle_return);
2609 
migration_disable_value(void)2610 static unsigned short migration_disable_value(void)
2611 {
2612 #if defined(CONFIG_SMP)
2613 	return current->migration_disabled;
2614 #else
2615 	return 0;
2616 #endif
2617 }
2618 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2619 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2620 {
2621 	unsigned int trace_flags = irqs_status;
2622 	unsigned int pc;
2623 
2624 	pc = preempt_count();
2625 
2626 	if (pc & NMI_MASK)
2627 		trace_flags |= TRACE_FLAG_NMI;
2628 	if (pc & HARDIRQ_MASK)
2629 		trace_flags |= TRACE_FLAG_HARDIRQ;
2630 	if (in_serving_softirq())
2631 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2632 
2633 	if (tif_need_resched())
2634 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2635 	if (test_preempt_need_resched())
2636 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2637 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2638 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2639 }
2640 
2641 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2642 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2643 			  int type,
2644 			  unsigned long len,
2645 			  unsigned int trace_ctx)
2646 {
2647 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2648 }
2649 
2650 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2651 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2652 static int trace_buffered_event_ref;
2653 
2654 /**
2655  * trace_buffered_event_enable - enable buffering events
2656  *
2657  * When events are being filtered, it is quicker to use a temporary
2658  * buffer to write the event data into if there's a likely chance
2659  * that it will not be committed. The discard of the ring buffer
2660  * is not as fast as committing, and is much slower than copying
2661  * a commit.
2662  *
2663  * When an event is to be filtered, allocate per cpu buffers to
2664  * write the event data into, and if the event is filtered and discarded
2665  * it is simply dropped, otherwise, the entire data is to be committed
2666  * in one shot.
2667  */
trace_buffered_event_enable(void)2668 void trace_buffered_event_enable(void)
2669 {
2670 	struct ring_buffer_event *event;
2671 	struct page *page;
2672 	int cpu;
2673 
2674 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2675 
2676 	if (trace_buffered_event_ref++)
2677 		return;
2678 
2679 	for_each_tracing_cpu(cpu) {
2680 		page = alloc_pages_node(cpu_to_node(cpu),
2681 					GFP_KERNEL | __GFP_NORETRY, 0);
2682 		/* This is just an optimization and can handle failures */
2683 		if (!page) {
2684 			pr_err("Failed to allocate event buffer\n");
2685 			break;
2686 		}
2687 
2688 		event = page_address(page);
2689 		memset(event, 0, sizeof(*event));
2690 
2691 		per_cpu(trace_buffered_event, cpu) = event;
2692 
2693 		preempt_disable();
2694 		if (cpu == smp_processor_id() &&
2695 		    __this_cpu_read(trace_buffered_event) !=
2696 		    per_cpu(trace_buffered_event, cpu))
2697 			WARN_ON_ONCE(1);
2698 		preempt_enable();
2699 	}
2700 }
2701 
enable_trace_buffered_event(void * data)2702 static void enable_trace_buffered_event(void *data)
2703 {
2704 	/* Probably not needed, but do it anyway */
2705 	smp_rmb();
2706 	this_cpu_dec(trace_buffered_event_cnt);
2707 }
2708 
disable_trace_buffered_event(void * data)2709 static void disable_trace_buffered_event(void *data)
2710 {
2711 	this_cpu_inc(trace_buffered_event_cnt);
2712 }
2713 
2714 /**
2715  * trace_buffered_event_disable - disable buffering events
2716  *
2717  * When a filter is removed, it is faster to not use the buffered
2718  * events, and to commit directly into the ring buffer. Free up
2719  * the temp buffers when there are no more users. This requires
2720  * special synchronization with current events.
2721  */
trace_buffered_event_disable(void)2722 void trace_buffered_event_disable(void)
2723 {
2724 	int cpu;
2725 
2726 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2727 
2728 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2729 		return;
2730 
2731 	if (--trace_buffered_event_ref)
2732 		return;
2733 
2734 	/* For each CPU, set the buffer as used. */
2735 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2736 			 NULL, true);
2737 
2738 	/* Wait for all current users to finish */
2739 	synchronize_rcu();
2740 
2741 	for_each_tracing_cpu(cpu) {
2742 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2743 		per_cpu(trace_buffered_event, cpu) = NULL;
2744 	}
2745 
2746 	/*
2747 	 * Wait for all CPUs that potentially started checking if they can use
2748 	 * their event buffer only after the previous synchronize_rcu() call and
2749 	 * they still read a valid pointer from trace_buffered_event. It must be
2750 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2751 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2752 	 */
2753 	synchronize_rcu();
2754 
2755 	/* For each CPU, relinquish the buffer */
2756 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2757 			 true);
2758 }
2759 
2760 static struct trace_buffer *temp_buffer;
2761 
2762 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2763 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2764 			  struct trace_event_file *trace_file,
2765 			  int type, unsigned long len,
2766 			  unsigned int trace_ctx)
2767 {
2768 	struct ring_buffer_event *entry;
2769 	struct trace_array *tr = trace_file->tr;
2770 	int val;
2771 
2772 	*current_rb = tr->array_buffer.buffer;
2773 
2774 	if (!tr->no_filter_buffering_ref &&
2775 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2776 	    (entry = this_cpu_read(trace_buffered_event))) {
2777 		/*
2778 		 * Filtering is on, so try to use the per cpu buffer first.
2779 		 * This buffer will simulate a ring_buffer_event,
2780 		 * where the type_len is zero and the array[0] will
2781 		 * hold the full length.
2782 		 * (see include/linux/ring-buffer.h for details on
2783 		 *  how the ring_buffer_event is structured).
2784 		 *
2785 		 * Using a temp buffer during filtering and copying it
2786 		 * on a matched filter is quicker than writing directly
2787 		 * into the ring buffer and then discarding it when
2788 		 * it doesn't match. That is because the discard
2789 		 * requires several atomic operations to get right.
2790 		 * Copying on match and doing nothing on a failed match
2791 		 * is still quicker than no copy on match, but having
2792 		 * to discard out of the ring buffer on a failed match.
2793 		 */
2794 		int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2795 
2796 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2797 
2798 		/*
2799 		 * Preemption is disabled, but interrupts and NMIs
2800 		 * can still come in now. If that happens after
2801 		 * the above increment, then it will have to go
2802 		 * back to the old method of allocating the event
2803 		 * on the ring buffer, and if the filter fails, it
2804 		 * will have to call ring_buffer_discard_commit()
2805 		 * to remove it.
2806 		 *
2807 		 * Need to also check the unlikely case that the
2808 		 * length is bigger than the temp buffer size.
2809 		 * If that happens, then the reserve is pretty much
2810 		 * guaranteed to fail, as the ring buffer currently
2811 		 * only allows events less than a page. But that may
2812 		 * change in the future, so let the ring buffer reserve
2813 		 * handle the failure in that case.
2814 		 */
2815 		if (val == 1 && likely(len <= max_len)) {
2816 			trace_event_setup(entry, type, trace_ctx);
2817 			entry->array[0] = len;
2818 			return entry;
2819 		}
2820 		this_cpu_dec(trace_buffered_event_cnt);
2821 	}
2822 
2823 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2824 					    trace_ctx);
2825 	/*
2826 	 * If tracing is off, but we have triggers enabled
2827 	 * we still need to look at the event data. Use the temp_buffer
2828 	 * to store the trace event for the trigger to use. It's recursive
2829 	 * safe and will not be recorded anywhere.
2830 	 */
2831 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2832 		*current_rb = temp_buffer;
2833 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2834 						    trace_ctx);
2835 	}
2836 	return entry;
2837 }
2838 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2839 
2840 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2841 static DEFINE_MUTEX(tracepoint_printk_mutex);
2842 
output_printk(struct trace_event_buffer * fbuffer)2843 static void output_printk(struct trace_event_buffer *fbuffer)
2844 {
2845 	struct trace_event_call *event_call;
2846 	struct trace_event_file *file;
2847 	struct trace_event *event;
2848 	unsigned long flags;
2849 	struct trace_iterator *iter = tracepoint_print_iter;
2850 
2851 	/* We should never get here if iter is NULL */
2852 	if (WARN_ON_ONCE(!iter))
2853 		return;
2854 
2855 	event_call = fbuffer->trace_file->event_call;
2856 	if (!event_call || !event_call->event.funcs ||
2857 	    !event_call->event.funcs->trace)
2858 		return;
2859 
2860 	file = fbuffer->trace_file;
2861 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2862 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2863 	     !filter_match_preds(file->filter, fbuffer->entry)))
2864 		return;
2865 
2866 	event = &fbuffer->trace_file->event_call->event;
2867 
2868 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2869 	trace_seq_init(&iter->seq);
2870 	iter->ent = fbuffer->entry;
2871 	event_call->event.funcs->trace(iter, 0, event);
2872 	trace_seq_putc(&iter->seq, 0);
2873 	printk("%s", iter->seq.buffer);
2874 
2875 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2876 }
2877 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2878 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2879 			     void *buffer, size_t *lenp,
2880 			     loff_t *ppos)
2881 {
2882 	int save_tracepoint_printk;
2883 	int ret;
2884 
2885 	mutex_lock(&tracepoint_printk_mutex);
2886 	save_tracepoint_printk = tracepoint_printk;
2887 
2888 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2889 
2890 	/*
2891 	 * This will force exiting early, as tracepoint_printk
2892 	 * is always zero when tracepoint_printk_iter is not allocated
2893 	 */
2894 	if (!tracepoint_print_iter)
2895 		tracepoint_printk = 0;
2896 
2897 	if (save_tracepoint_printk == tracepoint_printk)
2898 		goto out;
2899 
2900 	if (tracepoint_printk)
2901 		static_key_enable(&tracepoint_printk_key.key);
2902 	else
2903 		static_key_disable(&tracepoint_printk_key.key);
2904 
2905  out:
2906 	mutex_unlock(&tracepoint_printk_mutex);
2907 
2908 	return ret;
2909 }
2910 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2911 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2912 {
2913 	enum event_trigger_type tt = ETT_NONE;
2914 	struct trace_event_file *file = fbuffer->trace_file;
2915 
2916 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2917 			fbuffer->entry, &tt))
2918 		goto discard;
2919 
2920 	if (static_key_false(&tracepoint_printk_key.key))
2921 		output_printk(fbuffer);
2922 
2923 	if (static_branch_unlikely(&trace_event_exports_enabled))
2924 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2925 
2926 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2927 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2928 
2929 discard:
2930 	if (tt)
2931 		event_triggers_post_call(file, tt);
2932 
2933 }
2934 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2935 
2936 /*
2937  * Skip 3:
2938  *
2939  *   trace_buffer_unlock_commit_regs()
2940  *   trace_event_buffer_commit()
2941  *   trace_event_raw_event_xxx()
2942  */
2943 # define STACK_SKIP 3
2944 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2945 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2946 				     struct trace_buffer *buffer,
2947 				     struct ring_buffer_event *event,
2948 				     unsigned int trace_ctx,
2949 				     struct pt_regs *regs)
2950 {
2951 	__buffer_unlock_commit(buffer, event);
2952 
2953 	/*
2954 	 * If regs is not set, then skip the necessary functions.
2955 	 * Note, we can still get here via blktrace, wakeup tracer
2956 	 * and mmiotrace, but that's ok if they lose a function or
2957 	 * two. They are not that meaningful.
2958 	 */
2959 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2960 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2961 }
2962 
2963 /*
2964  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2965  */
2966 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2967 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2968 				   struct ring_buffer_event *event)
2969 {
2970 	__buffer_unlock_commit(buffer, event);
2971 }
2972 
2973 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2974 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2975 	       parent_ip, unsigned int trace_ctx)
2976 {
2977 	struct trace_event_call *call = &event_function;
2978 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2979 	struct ring_buffer_event *event;
2980 	struct ftrace_entry *entry;
2981 
2982 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2983 					    trace_ctx);
2984 	if (!event)
2985 		return;
2986 	entry	= ring_buffer_event_data(event);
2987 	entry->ip			= ip;
2988 	entry->parent_ip		= parent_ip;
2989 
2990 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2991 		if (static_branch_unlikely(&trace_function_exports_enabled))
2992 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2993 		__buffer_unlock_commit(buffer, event);
2994 	}
2995 }
2996 
2997 #ifdef CONFIG_STACKTRACE
2998 
2999 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3000 #define FTRACE_KSTACK_NESTING	4
3001 
3002 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3003 
3004 struct ftrace_stack {
3005 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3006 };
3007 
3008 
3009 struct ftrace_stacks {
3010 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3011 };
3012 
3013 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3014 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3015 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3016 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3017 				 unsigned int trace_ctx,
3018 				 int skip, struct pt_regs *regs)
3019 {
3020 	struct trace_event_call *call = &event_kernel_stack;
3021 	struct ring_buffer_event *event;
3022 	unsigned int size, nr_entries;
3023 	struct ftrace_stack *fstack;
3024 	struct stack_entry *entry;
3025 	int stackidx;
3026 
3027 	/*
3028 	 * Add one, for this function and the call to save_stack_trace()
3029 	 * If regs is set, then these functions will not be in the way.
3030 	 */
3031 #ifndef CONFIG_UNWINDER_ORC
3032 	if (!regs)
3033 		skip++;
3034 #endif
3035 
3036 	preempt_disable_notrace();
3037 
3038 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3039 
3040 	/* This should never happen. If it does, yell once and skip */
3041 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3042 		goto out;
3043 
3044 	/*
3045 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3046 	 * interrupt will either see the value pre increment or post
3047 	 * increment. If the interrupt happens pre increment it will have
3048 	 * restored the counter when it returns.  We just need a barrier to
3049 	 * keep gcc from moving things around.
3050 	 */
3051 	barrier();
3052 
3053 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3054 	size = ARRAY_SIZE(fstack->calls);
3055 
3056 	if (regs) {
3057 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3058 						   size, skip);
3059 	} else {
3060 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3061 	}
3062 
3063 	size = nr_entries * sizeof(unsigned long);
3064 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3065 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
3066 				    trace_ctx);
3067 	if (!event)
3068 		goto out;
3069 	entry = ring_buffer_event_data(event);
3070 
3071 	memcpy(&entry->caller, fstack->calls, size);
3072 	entry->size = nr_entries;
3073 
3074 	if (!call_filter_check_discard(call, entry, buffer, event))
3075 		__buffer_unlock_commit(buffer, event);
3076 
3077  out:
3078 	/* Again, don't let gcc optimize things here */
3079 	barrier();
3080 	__this_cpu_dec(ftrace_stack_reserve);
3081 	preempt_enable_notrace();
3082 
3083 }
3084 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3085 static inline void ftrace_trace_stack(struct trace_array *tr,
3086 				      struct trace_buffer *buffer,
3087 				      unsigned int trace_ctx,
3088 				      int skip, struct pt_regs *regs)
3089 {
3090 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3091 		return;
3092 
3093 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3094 }
3095 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3096 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3097 		   int skip)
3098 {
3099 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3100 
3101 	if (rcu_is_watching()) {
3102 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3103 		return;
3104 	}
3105 
3106 	/*
3107 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3108 	 * but if the above rcu_is_watching() failed, then the NMI
3109 	 * triggered someplace critical, and rcu_irq_enter() should
3110 	 * not be called from NMI.
3111 	 */
3112 	if (unlikely(in_nmi()))
3113 		return;
3114 
3115 	rcu_irq_enter_irqson();
3116 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3117 	rcu_irq_exit_irqson();
3118 }
3119 
3120 /**
3121  * trace_dump_stack - record a stack back trace in the trace buffer
3122  * @skip: Number of functions to skip (helper handlers)
3123  */
trace_dump_stack(int skip)3124 void trace_dump_stack(int skip)
3125 {
3126 	if (tracing_disabled || tracing_selftest_running)
3127 		return;
3128 
3129 #ifndef CONFIG_UNWINDER_ORC
3130 	/* Skip 1 to skip this function. */
3131 	skip++;
3132 #endif
3133 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3134 			     tracing_gen_ctx(), skip, NULL);
3135 }
3136 EXPORT_SYMBOL_GPL(trace_dump_stack);
3137 
3138 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3139 static DEFINE_PER_CPU(int, user_stack_count);
3140 
3141 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3142 ftrace_trace_userstack(struct trace_array *tr,
3143 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3144 {
3145 	struct trace_event_call *call = &event_user_stack;
3146 	struct ring_buffer_event *event;
3147 	struct userstack_entry *entry;
3148 
3149 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3150 		return;
3151 
3152 	/*
3153 	 * NMIs can not handle page faults, even with fix ups.
3154 	 * The save user stack can (and often does) fault.
3155 	 */
3156 	if (unlikely(in_nmi()))
3157 		return;
3158 
3159 	/*
3160 	 * prevent recursion, since the user stack tracing may
3161 	 * trigger other kernel events.
3162 	 */
3163 	preempt_disable();
3164 	if (__this_cpu_read(user_stack_count))
3165 		goto out;
3166 
3167 	__this_cpu_inc(user_stack_count);
3168 
3169 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3170 					    sizeof(*entry), trace_ctx);
3171 	if (!event)
3172 		goto out_drop_count;
3173 	entry	= ring_buffer_event_data(event);
3174 
3175 	entry->tgid		= current->tgid;
3176 	memset(&entry->caller, 0, sizeof(entry->caller));
3177 
3178 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3179 	if (!call_filter_check_discard(call, entry, buffer, event))
3180 		__buffer_unlock_commit(buffer, event);
3181 
3182  out_drop_count:
3183 	__this_cpu_dec(user_stack_count);
3184  out:
3185 	preempt_enable();
3186 }
3187 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3188 static void ftrace_trace_userstack(struct trace_array *tr,
3189 				   struct trace_buffer *buffer,
3190 				   unsigned int trace_ctx)
3191 {
3192 }
3193 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3194 
3195 #endif /* CONFIG_STACKTRACE */
3196 
3197 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3198 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3199 			  unsigned long long delta)
3200 {
3201 	entry->bottom_delta_ts = delta & U32_MAX;
3202 	entry->top_delta_ts = (delta >> 32);
3203 }
3204 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3205 void trace_last_func_repeats(struct trace_array *tr,
3206 			     struct trace_func_repeats *last_info,
3207 			     unsigned int trace_ctx)
3208 {
3209 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3210 	struct func_repeats_entry *entry;
3211 	struct ring_buffer_event *event;
3212 	u64 delta;
3213 
3214 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3215 					    sizeof(*entry), trace_ctx);
3216 	if (!event)
3217 		return;
3218 
3219 	delta = ring_buffer_event_time_stamp(buffer, event) -
3220 		last_info->ts_last_call;
3221 
3222 	entry = ring_buffer_event_data(event);
3223 	entry->ip = last_info->ip;
3224 	entry->parent_ip = last_info->parent_ip;
3225 	entry->count = last_info->count;
3226 	func_repeats_set_delta_ts(entry, delta);
3227 
3228 	__buffer_unlock_commit(buffer, event);
3229 }
3230 
3231 /* created for use with alloc_percpu */
3232 struct trace_buffer_struct {
3233 	int nesting;
3234 	char buffer[4][TRACE_BUF_SIZE];
3235 };
3236 
3237 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3238 
3239 /*
3240  * This allows for lockless recording.  If we're nested too deeply, then
3241  * this returns NULL.
3242  */
get_trace_buf(void)3243 static char *get_trace_buf(void)
3244 {
3245 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3246 
3247 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3248 		return NULL;
3249 
3250 	buffer->nesting++;
3251 
3252 	/* Interrupts must see nesting incremented before we use the buffer */
3253 	barrier();
3254 	return &buffer->buffer[buffer->nesting - 1][0];
3255 }
3256 
put_trace_buf(void)3257 static void put_trace_buf(void)
3258 {
3259 	/* Don't let the decrement of nesting leak before this */
3260 	barrier();
3261 	this_cpu_dec(trace_percpu_buffer->nesting);
3262 }
3263 
alloc_percpu_trace_buffer(void)3264 static int alloc_percpu_trace_buffer(void)
3265 {
3266 	struct trace_buffer_struct __percpu *buffers;
3267 
3268 	if (trace_percpu_buffer)
3269 		return 0;
3270 
3271 	buffers = alloc_percpu(struct trace_buffer_struct);
3272 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3273 		return -ENOMEM;
3274 
3275 	trace_percpu_buffer = buffers;
3276 	return 0;
3277 }
3278 
3279 static int buffers_allocated;
3280 
trace_printk_init_buffers(void)3281 void trace_printk_init_buffers(void)
3282 {
3283 	if (buffers_allocated)
3284 		return;
3285 
3286 	if (alloc_percpu_trace_buffer())
3287 		return;
3288 
3289 	/* trace_printk() is for debug use only. Don't use it in production. */
3290 
3291 	pr_warn("\n");
3292 	pr_warn("**********************************************************\n");
3293 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3294 	pr_warn("**                                                      **\n");
3295 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3296 	pr_warn("**                                                      **\n");
3297 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3298 	pr_warn("** unsafe for production use.                           **\n");
3299 	pr_warn("**                                                      **\n");
3300 	pr_warn("** If you see this message and you are not debugging    **\n");
3301 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3302 	pr_warn("**                                                      **\n");
3303 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3304 	pr_warn("**********************************************************\n");
3305 
3306 	/* Expand the buffers to set size */
3307 	tracing_update_buffers();
3308 
3309 	buffers_allocated = 1;
3310 
3311 	/*
3312 	 * trace_printk_init_buffers() can be called by modules.
3313 	 * If that happens, then we need to start cmdline recording
3314 	 * directly here. If the global_trace.buffer is already
3315 	 * allocated here, then this was called by module code.
3316 	 */
3317 	if (global_trace.array_buffer.buffer)
3318 		tracing_start_cmdline_record();
3319 }
3320 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3321 
trace_printk_start_comm(void)3322 void trace_printk_start_comm(void)
3323 {
3324 	/* Start tracing comms if trace printk is set */
3325 	if (!buffers_allocated)
3326 		return;
3327 	tracing_start_cmdline_record();
3328 }
3329 
trace_printk_start_stop_comm(int enabled)3330 static void trace_printk_start_stop_comm(int enabled)
3331 {
3332 	if (!buffers_allocated)
3333 		return;
3334 
3335 	if (enabled)
3336 		tracing_start_cmdline_record();
3337 	else
3338 		tracing_stop_cmdline_record();
3339 }
3340 
3341 /**
3342  * trace_vbprintk - write binary msg to tracing buffer
3343  * @ip:    The address of the caller
3344  * @fmt:   The string format to write to the buffer
3345  * @args:  Arguments for @fmt
3346  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3347 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3348 {
3349 	struct trace_event_call *call = &event_bprint;
3350 	struct ring_buffer_event *event;
3351 	struct trace_buffer *buffer;
3352 	struct trace_array *tr = &global_trace;
3353 	struct bprint_entry *entry;
3354 	unsigned int trace_ctx;
3355 	char *tbuffer;
3356 	int len = 0, size;
3357 
3358 	if (unlikely(tracing_selftest_running || tracing_disabled))
3359 		return 0;
3360 
3361 	/* Don't pollute graph traces with trace_vprintk internals */
3362 	pause_graph_tracing();
3363 
3364 	trace_ctx = tracing_gen_ctx();
3365 	preempt_disable_notrace();
3366 
3367 	tbuffer = get_trace_buf();
3368 	if (!tbuffer) {
3369 		len = 0;
3370 		goto out_nobuffer;
3371 	}
3372 
3373 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3374 
3375 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3376 		goto out_put;
3377 
3378 	size = sizeof(*entry) + sizeof(u32) * len;
3379 	buffer = tr->array_buffer.buffer;
3380 	ring_buffer_nest_start(buffer);
3381 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3382 					    trace_ctx);
3383 	if (!event)
3384 		goto out;
3385 	entry = ring_buffer_event_data(event);
3386 	entry->ip			= ip;
3387 	entry->fmt			= fmt;
3388 
3389 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3390 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3391 		__buffer_unlock_commit(buffer, event);
3392 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3393 	}
3394 
3395 out:
3396 	ring_buffer_nest_end(buffer);
3397 out_put:
3398 	put_trace_buf();
3399 
3400 out_nobuffer:
3401 	preempt_enable_notrace();
3402 	unpause_graph_tracing();
3403 
3404 	return len;
3405 }
3406 EXPORT_SYMBOL_GPL(trace_vbprintk);
3407 
3408 __printf(3, 0)
3409 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3410 __trace_array_vprintk(struct trace_buffer *buffer,
3411 		      unsigned long ip, const char *fmt, va_list args)
3412 {
3413 	struct trace_event_call *call = &event_print;
3414 	struct ring_buffer_event *event;
3415 	int len = 0, size;
3416 	struct print_entry *entry;
3417 	unsigned int trace_ctx;
3418 	char *tbuffer;
3419 
3420 	if (tracing_disabled || tracing_selftest_running)
3421 		return 0;
3422 
3423 	/* Don't pollute graph traces with trace_vprintk internals */
3424 	pause_graph_tracing();
3425 
3426 	trace_ctx = tracing_gen_ctx();
3427 	preempt_disable_notrace();
3428 
3429 
3430 	tbuffer = get_trace_buf();
3431 	if (!tbuffer) {
3432 		len = 0;
3433 		goto out_nobuffer;
3434 	}
3435 
3436 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3437 
3438 	size = sizeof(*entry) + len + 1;
3439 	ring_buffer_nest_start(buffer);
3440 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3441 					    trace_ctx);
3442 	if (!event)
3443 		goto out;
3444 	entry = ring_buffer_event_data(event);
3445 	entry->ip = ip;
3446 
3447 	memcpy(&entry->buf, tbuffer, len + 1);
3448 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3449 		__buffer_unlock_commit(buffer, event);
3450 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3451 	}
3452 
3453 out:
3454 	ring_buffer_nest_end(buffer);
3455 	put_trace_buf();
3456 
3457 out_nobuffer:
3458 	preempt_enable_notrace();
3459 	unpause_graph_tracing();
3460 
3461 	return len;
3462 }
3463 
3464 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3465 int trace_array_vprintk(struct trace_array *tr,
3466 			unsigned long ip, const char *fmt, va_list args)
3467 {
3468 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3469 }
3470 
3471 /**
3472  * trace_array_printk - Print a message to a specific instance
3473  * @tr: The instance trace_array descriptor
3474  * @ip: The instruction pointer that this is called from.
3475  * @fmt: The format to print (printf format)
3476  *
3477  * If a subsystem sets up its own instance, they have the right to
3478  * printk strings into their tracing instance buffer using this
3479  * function. Note, this function will not write into the top level
3480  * buffer (use trace_printk() for that), as writing into the top level
3481  * buffer should only have events that can be individually disabled.
3482  * trace_printk() is only used for debugging a kernel, and should not
3483  * be ever incorporated in normal use.
3484  *
3485  * trace_array_printk() can be used, as it will not add noise to the
3486  * top level tracing buffer.
3487  *
3488  * Note, trace_array_init_printk() must be called on @tr before this
3489  * can be used.
3490  */
3491 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3492 int trace_array_printk(struct trace_array *tr,
3493 		       unsigned long ip, const char *fmt, ...)
3494 {
3495 	int ret;
3496 	va_list ap;
3497 
3498 	if (!tr)
3499 		return -ENOENT;
3500 
3501 	/* This is only allowed for created instances */
3502 	if (tr == &global_trace)
3503 		return 0;
3504 
3505 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3506 		return 0;
3507 
3508 	va_start(ap, fmt);
3509 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3510 	va_end(ap);
3511 	return ret;
3512 }
3513 EXPORT_SYMBOL_GPL(trace_array_printk);
3514 
3515 /**
3516  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3517  * @tr: The trace array to initialize the buffers for
3518  *
3519  * As trace_array_printk() only writes into instances, they are OK to
3520  * have in the kernel (unlike trace_printk()). This needs to be called
3521  * before trace_array_printk() can be used on a trace_array.
3522  */
trace_array_init_printk(struct trace_array * tr)3523 int trace_array_init_printk(struct trace_array *tr)
3524 {
3525 	if (!tr)
3526 		return -ENOENT;
3527 
3528 	/* This is only allowed for created instances */
3529 	if (tr == &global_trace)
3530 		return -EINVAL;
3531 
3532 	return alloc_percpu_trace_buffer();
3533 }
3534 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3535 
3536 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3537 int trace_array_printk_buf(struct trace_buffer *buffer,
3538 			   unsigned long ip, const char *fmt, ...)
3539 {
3540 	int ret;
3541 	va_list ap;
3542 
3543 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3544 		return 0;
3545 
3546 	va_start(ap, fmt);
3547 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3548 	va_end(ap);
3549 	return ret;
3550 }
3551 
3552 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3553 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3554 {
3555 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3556 }
3557 EXPORT_SYMBOL_GPL(trace_vprintk);
3558 
trace_iterator_increment(struct trace_iterator * iter)3559 static void trace_iterator_increment(struct trace_iterator *iter)
3560 {
3561 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3562 
3563 	iter->idx++;
3564 	if (buf_iter)
3565 		ring_buffer_iter_advance(buf_iter);
3566 }
3567 
3568 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3569 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3570 		unsigned long *lost_events)
3571 {
3572 	struct ring_buffer_event *event;
3573 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3574 
3575 	if (buf_iter) {
3576 		event = ring_buffer_iter_peek(buf_iter, ts);
3577 		if (lost_events)
3578 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3579 				(unsigned long)-1 : 0;
3580 	} else {
3581 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3582 					 lost_events);
3583 	}
3584 
3585 	if (event) {
3586 		iter->ent_size = ring_buffer_event_length(event);
3587 		return ring_buffer_event_data(event);
3588 	}
3589 	iter->ent_size = 0;
3590 	return NULL;
3591 }
3592 
3593 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3594 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3595 		  unsigned long *missing_events, u64 *ent_ts)
3596 {
3597 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3598 	struct trace_entry *ent, *next = NULL;
3599 	unsigned long lost_events = 0, next_lost = 0;
3600 	int cpu_file = iter->cpu_file;
3601 	u64 next_ts = 0, ts;
3602 	int next_cpu = -1;
3603 	int next_size = 0;
3604 	int cpu;
3605 
3606 	/*
3607 	 * If we are in a per_cpu trace file, don't bother by iterating over
3608 	 * all cpu and peek directly.
3609 	 */
3610 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3611 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3612 			return NULL;
3613 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3614 		if (ent_cpu)
3615 			*ent_cpu = cpu_file;
3616 
3617 		return ent;
3618 	}
3619 
3620 	for_each_tracing_cpu(cpu) {
3621 
3622 		if (ring_buffer_empty_cpu(buffer, cpu))
3623 			continue;
3624 
3625 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3626 
3627 		/*
3628 		 * Pick the entry with the smallest timestamp:
3629 		 */
3630 		if (ent && (!next || ts < next_ts)) {
3631 			next = ent;
3632 			next_cpu = cpu;
3633 			next_ts = ts;
3634 			next_lost = lost_events;
3635 			next_size = iter->ent_size;
3636 		}
3637 	}
3638 
3639 	iter->ent_size = next_size;
3640 
3641 	if (ent_cpu)
3642 		*ent_cpu = next_cpu;
3643 
3644 	if (ent_ts)
3645 		*ent_ts = next_ts;
3646 
3647 	if (missing_events)
3648 		*missing_events = next_lost;
3649 
3650 	return next;
3651 }
3652 
3653 #define STATIC_FMT_BUF_SIZE	128
3654 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3655 
trace_iter_expand_format(struct trace_iterator * iter)3656 static char *trace_iter_expand_format(struct trace_iterator *iter)
3657 {
3658 	char *tmp;
3659 
3660 	/*
3661 	 * iter->tr is NULL when used with tp_printk, which makes
3662 	 * this get called where it is not safe to call krealloc().
3663 	 */
3664 	if (!iter->tr || iter->fmt == static_fmt_buf)
3665 		return NULL;
3666 
3667 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3668 		       GFP_KERNEL);
3669 	if (tmp) {
3670 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3671 		iter->fmt = tmp;
3672 	}
3673 
3674 	return tmp;
3675 }
3676 
3677 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str,bool star,int len)3678 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3679 			   bool star, int len)
3680 {
3681 	unsigned long addr = (unsigned long)str;
3682 	struct trace_event *trace_event;
3683 	struct trace_event_call *event;
3684 
3685 	/* Ignore strings with no length */
3686 	if (star && !len)
3687 		return true;
3688 
3689 	/* OK if part of the event data */
3690 	if ((addr >= (unsigned long)iter->ent) &&
3691 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3692 		return true;
3693 
3694 	/* OK if part of the temp seq buffer */
3695 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3696 	    (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3697 		return true;
3698 
3699 	/* Core rodata can not be freed */
3700 	if (is_kernel_rodata(addr))
3701 		return true;
3702 
3703 	if (trace_is_tracepoint_string(str))
3704 		return true;
3705 
3706 	/*
3707 	 * Now this could be a module event, referencing core module
3708 	 * data, which is OK.
3709 	 */
3710 	if (!iter->ent)
3711 		return false;
3712 
3713 	trace_event = ftrace_find_event(iter->ent->type);
3714 	if (!trace_event)
3715 		return false;
3716 
3717 	event = container_of(trace_event, struct trace_event_call, event);
3718 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3719 		return false;
3720 
3721 	/* Would rather have rodata, but this will suffice */
3722 	if (within_module_core(addr, event->module))
3723 		return true;
3724 
3725 	return false;
3726 }
3727 
show_buffer(struct trace_seq * s)3728 static const char *show_buffer(struct trace_seq *s)
3729 {
3730 	struct seq_buf *seq = &s->seq;
3731 
3732 	seq_buf_terminate(seq);
3733 
3734 	return seq->buffer;
3735 }
3736 
3737 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3738 
test_can_verify_check(const char * fmt,...)3739 static int test_can_verify_check(const char *fmt, ...)
3740 {
3741 	char buf[16];
3742 	va_list ap;
3743 	int ret;
3744 
3745 	/*
3746 	 * The verifier is dependent on vsnprintf() modifies the va_list
3747 	 * passed to it, where it is sent as a reference. Some architectures
3748 	 * (like x86_32) passes it by value, which means that vsnprintf()
3749 	 * does not modify the va_list passed to it, and the verifier
3750 	 * would then need to be able to understand all the values that
3751 	 * vsnprintf can use. If it is passed by value, then the verifier
3752 	 * is disabled.
3753 	 */
3754 	va_start(ap, fmt);
3755 	vsnprintf(buf, 16, "%d", ap);
3756 	ret = va_arg(ap, int);
3757 	va_end(ap);
3758 
3759 	return ret;
3760 }
3761 
test_can_verify(void)3762 static void test_can_verify(void)
3763 {
3764 	if (!test_can_verify_check("%d %d", 0, 1)) {
3765 		pr_info("trace event string verifier disabled\n");
3766 		static_branch_inc(&trace_no_verify);
3767 	}
3768 }
3769 
3770 /**
3771  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3772  * @iter: The iterator that holds the seq buffer and the event being printed
3773  * @fmt: The format used to print the event
3774  * @ap: The va_list holding the data to print from @fmt.
3775  *
3776  * This writes the data into the @iter->seq buffer using the data from
3777  * @fmt and @ap. If the format has a %s, then the source of the string
3778  * is examined to make sure it is safe to print, otherwise it will
3779  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3780  * pointer.
3781  */
trace_check_vprintf(struct trace_iterator * iter,const char * fmt,va_list ap)3782 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3783 			 va_list ap)
3784 {
3785 	const char *p = fmt;
3786 	const char *str;
3787 	int i, j;
3788 
3789 	if (WARN_ON_ONCE(!fmt))
3790 		return;
3791 
3792 	if (static_branch_unlikely(&trace_no_verify))
3793 		goto print;
3794 
3795 	/* Don't bother checking when doing a ftrace_dump() */
3796 	if (iter->fmt == static_fmt_buf)
3797 		goto print;
3798 
3799 	while (*p) {
3800 		bool star = false;
3801 		int len = 0;
3802 
3803 		j = 0;
3804 
3805 		/* We only care about %s and variants */
3806 		for (i = 0; p[i]; i++) {
3807 			if (i + 1 >= iter->fmt_size) {
3808 				/*
3809 				 * If we can't expand the copy buffer,
3810 				 * just print it.
3811 				 */
3812 				if (!trace_iter_expand_format(iter))
3813 					goto print;
3814 			}
3815 
3816 			if (p[i] == '\\' && p[i+1]) {
3817 				i++;
3818 				continue;
3819 			}
3820 			if (p[i] == '%') {
3821 				/* Need to test cases like %08.*s */
3822 				for (j = 1; p[i+j]; j++) {
3823 					if (isdigit(p[i+j]) ||
3824 					    p[i+j] == '.')
3825 						continue;
3826 					if (p[i+j] == '*') {
3827 						star = true;
3828 						continue;
3829 					}
3830 					break;
3831 				}
3832 				if (p[i+j] == 's')
3833 					break;
3834 				star = false;
3835 			}
3836 			j = 0;
3837 		}
3838 		/* If no %s found then just print normally */
3839 		if (!p[i])
3840 			break;
3841 
3842 		/* Copy up to the %s, and print that */
3843 		strncpy(iter->fmt, p, i);
3844 		iter->fmt[i] = '\0';
3845 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3846 
3847 		/*
3848 		 * If iter->seq is full, the above call no longer guarantees
3849 		 * that ap is in sync with fmt processing, and further calls
3850 		 * to va_arg() can return wrong positional arguments.
3851 		 *
3852 		 * Ensure that ap is no longer used in this case.
3853 		 */
3854 		if (iter->seq.full) {
3855 			p = "";
3856 			break;
3857 		}
3858 
3859 		if (star)
3860 			len = va_arg(ap, int);
3861 
3862 		/* The ap now points to the string data of the %s */
3863 		str = va_arg(ap, const char *);
3864 
3865 		/*
3866 		 * If you hit this warning, it is likely that the
3867 		 * trace event in question used %s on a string that
3868 		 * was saved at the time of the event, but may not be
3869 		 * around when the trace is read. Use __string(),
3870 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3871 		 * instead. See samples/trace_events/trace-events-sample.h
3872 		 * for reference.
3873 		 */
3874 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3875 			      "fmt: '%s' current_buffer: '%s'",
3876 			      fmt, show_buffer(&iter->seq))) {
3877 			int ret;
3878 
3879 			/* Try to safely read the string */
3880 			if (star) {
3881 				if (len + 1 > iter->fmt_size)
3882 					len = iter->fmt_size - 1;
3883 				if (len < 0)
3884 					len = 0;
3885 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3886 				iter->fmt[len] = 0;
3887 				star = false;
3888 			} else {
3889 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3890 								  iter->fmt_size);
3891 			}
3892 			if (ret < 0)
3893 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3894 			else
3895 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3896 						 str, iter->fmt);
3897 			str = "[UNSAFE-MEMORY]";
3898 			strcpy(iter->fmt, "%s");
3899 		} else {
3900 			strncpy(iter->fmt, p + i, j + 1);
3901 			iter->fmt[j+1] = '\0';
3902 		}
3903 		if (star)
3904 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3905 		else
3906 			trace_seq_printf(&iter->seq, iter->fmt, str);
3907 
3908 		p += i + j + 1;
3909 	}
3910  print:
3911 	if (*p)
3912 		trace_seq_vprintf(&iter->seq, p, ap);
3913 }
3914 
trace_event_format(struct trace_iterator * iter,const char * fmt)3915 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3916 {
3917 	const char *p, *new_fmt;
3918 	char *q;
3919 
3920 	if (WARN_ON_ONCE(!fmt))
3921 		return fmt;
3922 
3923 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3924 		return fmt;
3925 
3926 	p = fmt;
3927 	new_fmt = q = iter->fmt;
3928 	while (*p) {
3929 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3930 			if (!trace_iter_expand_format(iter))
3931 				return fmt;
3932 
3933 			q += iter->fmt - new_fmt;
3934 			new_fmt = iter->fmt;
3935 		}
3936 
3937 		*q++ = *p++;
3938 
3939 		/* Replace %p with %px */
3940 		if (p[-1] == '%') {
3941 			if (p[0] == '%') {
3942 				*q++ = *p++;
3943 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3944 				*q++ = *p++;
3945 				*q++ = 'x';
3946 			}
3947 		}
3948 	}
3949 	*q = '\0';
3950 
3951 	return new_fmt;
3952 }
3953 
3954 #define STATIC_TEMP_BUF_SIZE	128
3955 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3956 
3957 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3958 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3959 					  int *ent_cpu, u64 *ent_ts)
3960 {
3961 	/* __find_next_entry will reset ent_size */
3962 	int ent_size = iter->ent_size;
3963 	struct trace_entry *entry;
3964 
3965 	/*
3966 	 * If called from ftrace_dump(), then the iter->temp buffer
3967 	 * will be the static_temp_buf and not created from kmalloc.
3968 	 * If the entry size is greater than the buffer, we can
3969 	 * not save it. Just return NULL in that case. This is only
3970 	 * used to add markers when two consecutive events' time
3971 	 * stamps have a large delta. See trace_print_lat_context()
3972 	 */
3973 	if (iter->temp == static_temp_buf &&
3974 	    STATIC_TEMP_BUF_SIZE < ent_size)
3975 		return NULL;
3976 
3977 	/*
3978 	 * The __find_next_entry() may call peek_next_entry(), which may
3979 	 * call ring_buffer_peek() that may make the contents of iter->ent
3980 	 * undefined. Need to copy iter->ent now.
3981 	 */
3982 	if (iter->ent && iter->ent != iter->temp) {
3983 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3984 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3985 			void *temp;
3986 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3987 			if (!temp)
3988 				return NULL;
3989 			kfree(iter->temp);
3990 			iter->temp = temp;
3991 			iter->temp_size = iter->ent_size;
3992 		}
3993 		memcpy(iter->temp, iter->ent, iter->ent_size);
3994 		iter->ent = iter->temp;
3995 	}
3996 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3997 	/* Put back the original ent_size */
3998 	iter->ent_size = ent_size;
3999 
4000 	return entry;
4001 }
4002 
4003 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)4004 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4005 {
4006 	iter->ent = __find_next_entry(iter, &iter->cpu,
4007 				      &iter->lost_events, &iter->ts);
4008 
4009 	if (iter->ent)
4010 		trace_iterator_increment(iter);
4011 
4012 	return iter->ent ? iter : NULL;
4013 }
4014 
trace_consume(struct trace_iterator * iter)4015 static void trace_consume(struct trace_iterator *iter)
4016 {
4017 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4018 			    &iter->lost_events);
4019 }
4020 
s_next(struct seq_file * m,void * v,loff_t * pos)4021 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4022 {
4023 	struct trace_iterator *iter = m->private;
4024 	int i = (int)*pos;
4025 	void *ent;
4026 
4027 	WARN_ON_ONCE(iter->leftover);
4028 
4029 	(*pos)++;
4030 
4031 	/* can't go backwards */
4032 	if (iter->idx > i)
4033 		return NULL;
4034 
4035 	if (iter->idx < 0)
4036 		ent = trace_find_next_entry_inc(iter);
4037 	else
4038 		ent = iter;
4039 
4040 	while (ent && iter->idx < i)
4041 		ent = trace_find_next_entry_inc(iter);
4042 
4043 	iter->pos = *pos;
4044 
4045 	return ent;
4046 }
4047 
tracing_iter_reset(struct trace_iterator * iter,int cpu)4048 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4049 {
4050 	struct ring_buffer_iter *buf_iter;
4051 	unsigned long entries = 0;
4052 	u64 ts;
4053 
4054 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4055 
4056 	buf_iter = trace_buffer_iter(iter, cpu);
4057 	if (!buf_iter)
4058 		return;
4059 
4060 	ring_buffer_iter_reset(buf_iter);
4061 
4062 	/*
4063 	 * We could have the case with the max latency tracers
4064 	 * that a reset never took place on a cpu. This is evident
4065 	 * by the timestamp being before the start of the buffer.
4066 	 */
4067 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4068 		if (ts >= iter->array_buffer->time_start)
4069 			break;
4070 		entries++;
4071 		ring_buffer_iter_advance(buf_iter);
4072 	}
4073 
4074 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4075 }
4076 
4077 /*
4078  * The current tracer is copied to avoid a global locking
4079  * all around.
4080  */
s_start(struct seq_file * m,loff_t * pos)4081 static void *s_start(struct seq_file *m, loff_t *pos)
4082 {
4083 	struct trace_iterator *iter = m->private;
4084 	struct trace_array *tr = iter->tr;
4085 	int cpu_file = iter->cpu_file;
4086 	void *p = NULL;
4087 	loff_t l = 0;
4088 	int cpu;
4089 
4090 	/*
4091 	 * copy the tracer to avoid using a global lock all around.
4092 	 * iter->trace is a copy of current_trace, the pointer to the
4093 	 * name may be used instead of a strcmp(), as iter->trace->name
4094 	 * will point to the same string as current_trace->name.
4095 	 */
4096 	mutex_lock(&trace_types_lock);
4097 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
4098 		/* Close iter->trace before switching to the new current tracer */
4099 		if (iter->trace->close)
4100 			iter->trace->close(iter);
4101 		*iter->trace = *tr->current_trace;
4102 		/* Reopen the new current tracer */
4103 		if (iter->trace->open)
4104 			iter->trace->open(iter);
4105 	}
4106 	mutex_unlock(&trace_types_lock);
4107 
4108 #ifdef CONFIG_TRACER_MAX_TRACE
4109 	if (iter->snapshot && iter->trace->use_max_tr)
4110 		return ERR_PTR(-EBUSY);
4111 #endif
4112 
4113 	if (*pos != iter->pos) {
4114 		iter->ent = NULL;
4115 		iter->cpu = 0;
4116 		iter->idx = -1;
4117 
4118 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4119 			for_each_tracing_cpu(cpu)
4120 				tracing_iter_reset(iter, cpu);
4121 		} else
4122 			tracing_iter_reset(iter, cpu_file);
4123 
4124 		iter->leftover = 0;
4125 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4126 			;
4127 
4128 	} else {
4129 		/*
4130 		 * If we overflowed the seq_file before, then we want
4131 		 * to just reuse the trace_seq buffer again.
4132 		 */
4133 		if (iter->leftover)
4134 			p = iter;
4135 		else {
4136 			l = *pos - 1;
4137 			p = s_next(m, p, &l);
4138 		}
4139 	}
4140 
4141 	trace_event_read_lock();
4142 	trace_access_lock(cpu_file);
4143 	return p;
4144 }
4145 
s_stop(struct seq_file * m,void * p)4146 static void s_stop(struct seq_file *m, void *p)
4147 {
4148 	struct trace_iterator *iter = m->private;
4149 
4150 #ifdef CONFIG_TRACER_MAX_TRACE
4151 	if (iter->snapshot && iter->trace->use_max_tr)
4152 		return;
4153 #endif
4154 
4155 	trace_access_unlock(iter->cpu_file);
4156 	trace_event_read_unlock();
4157 }
4158 
4159 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4160 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4161 		      unsigned long *entries, int cpu)
4162 {
4163 	unsigned long count;
4164 
4165 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4166 	/*
4167 	 * If this buffer has skipped entries, then we hold all
4168 	 * entries for the trace and we need to ignore the
4169 	 * ones before the time stamp.
4170 	 */
4171 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4172 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4173 		/* total is the same as the entries */
4174 		*total = count;
4175 	} else
4176 		*total = count +
4177 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4178 	*entries = count;
4179 }
4180 
4181 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4182 get_total_entries(struct array_buffer *buf,
4183 		  unsigned long *total, unsigned long *entries)
4184 {
4185 	unsigned long t, e;
4186 	int cpu;
4187 
4188 	*total = 0;
4189 	*entries = 0;
4190 
4191 	for_each_tracing_cpu(cpu) {
4192 		get_total_entries_cpu(buf, &t, &e, cpu);
4193 		*total += t;
4194 		*entries += e;
4195 	}
4196 }
4197 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4198 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4199 {
4200 	unsigned long total, entries;
4201 
4202 	if (!tr)
4203 		tr = &global_trace;
4204 
4205 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4206 
4207 	return entries;
4208 }
4209 
trace_total_entries(struct trace_array * tr)4210 unsigned long trace_total_entries(struct trace_array *tr)
4211 {
4212 	unsigned long total, entries;
4213 
4214 	if (!tr)
4215 		tr = &global_trace;
4216 
4217 	get_total_entries(&tr->array_buffer, &total, &entries);
4218 
4219 	return entries;
4220 }
4221 
print_lat_help_header(struct seq_file * m)4222 static void print_lat_help_header(struct seq_file *m)
4223 {
4224 	seq_puts(m, "#                    _------=> CPU#            \n"
4225 		    "#                   / _-----=> irqs-off        \n"
4226 		    "#                  | / _----=> need-resched    \n"
4227 		    "#                  || / _---=> hardirq/softirq \n"
4228 		    "#                  ||| / _--=> preempt-depth   \n"
4229 		    "#                  |||| / _-=> migrate-disable \n"
4230 		    "#                  ||||| /     delay           \n"
4231 		    "#  cmd     pid     |||||| time  |   caller     \n"
4232 		    "#     \\   /        ||||||  \\    |    /       \n");
4233 }
4234 
print_event_info(struct array_buffer * buf,struct seq_file * m)4235 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4236 {
4237 	unsigned long total;
4238 	unsigned long entries;
4239 
4240 	get_total_entries(buf, &total, &entries);
4241 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4242 		   entries, total, num_online_cpus());
4243 	seq_puts(m, "#\n");
4244 }
4245 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4246 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4247 				   unsigned int flags)
4248 {
4249 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4250 
4251 	print_event_info(buf, m);
4252 
4253 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4254 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4255 }
4256 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4257 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4258 				       unsigned int flags)
4259 {
4260 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4261 	const char *space = "            ";
4262 	int prec = tgid ? 12 : 2;
4263 
4264 	print_event_info(buf, m);
4265 
4266 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
4267 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4268 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4269 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4270 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4271 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4272 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4273 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4274 }
4275 
4276 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4277 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4278 {
4279 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4280 	struct array_buffer *buf = iter->array_buffer;
4281 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4282 	struct tracer *type = iter->trace;
4283 	unsigned long entries;
4284 	unsigned long total;
4285 	const char *name = "preemption";
4286 
4287 	name = type->name;
4288 
4289 	get_total_entries(buf, &total, &entries);
4290 
4291 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4292 		   name, UTS_RELEASE);
4293 	seq_puts(m, "# -----------------------------------"
4294 		 "---------------------------------\n");
4295 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4296 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4297 		   nsecs_to_usecs(data->saved_latency),
4298 		   entries,
4299 		   total,
4300 		   buf->cpu,
4301 #if defined(CONFIG_PREEMPT_NONE)
4302 		   "server",
4303 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4304 		   "desktop",
4305 #elif defined(CONFIG_PREEMPT)
4306 		   "preempt",
4307 #elif defined(CONFIG_PREEMPT_RT)
4308 		   "preempt_rt",
4309 #else
4310 		   "unknown",
4311 #endif
4312 		   /* These are reserved for later use */
4313 		   0, 0, 0, 0);
4314 #ifdef CONFIG_SMP
4315 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4316 #else
4317 	seq_puts(m, ")\n");
4318 #endif
4319 	seq_puts(m, "#    -----------------\n");
4320 	seq_printf(m, "#    | task: %.16s-%d "
4321 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4322 		   data->comm, data->pid,
4323 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4324 		   data->policy, data->rt_priority);
4325 	seq_puts(m, "#    -----------------\n");
4326 
4327 	if (data->critical_start) {
4328 		seq_puts(m, "#  => started at: ");
4329 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4330 		trace_print_seq(m, &iter->seq);
4331 		seq_puts(m, "\n#  => ended at:   ");
4332 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4333 		trace_print_seq(m, &iter->seq);
4334 		seq_puts(m, "\n#\n");
4335 	}
4336 
4337 	seq_puts(m, "#\n");
4338 }
4339 
test_cpu_buff_start(struct trace_iterator * iter)4340 static void test_cpu_buff_start(struct trace_iterator *iter)
4341 {
4342 	struct trace_seq *s = &iter->seq;
4343 	struct trace_array *tr = iter->tr;
4344 
4345 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4346 		return;
4347 
4348 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4349 		return;
4350 
4351 	if (cpumask_available(iter->started) &&
4352 	    cpumask_test_cpu(iter->cpu, iter->started))
4353 		return;
4354 
4355 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4356 		return;
4357 
4358 	if (cpumask_available(iter->started))
4359 		cpumask_set_cpu(iter->cpu, iter->started);
4360 
4361 	/* Don't print started cpu buffer for the first entry of the trace */
4362 	if (iter->idx > 1)
4363 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4364 				iter->cpu);
4365 }
4366 
print_trace_fmt(struct trace_iterator * iter)4367 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4368 {
4369 	struct trace_array *tr = iter->tr;
4370 	struct trace_seq *s = &iter->seq;
4371 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4372 	struct trace_entry *entry;
4373 	struct trace_event *event;
4374 
4375 	entry = iter->ent;
4376 
4377 	test_cpu_buff_start(iter);
4378 
4379 	event = ftrace_find_event(entry->type);
4380 
4381 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4382 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4383 			trace_print_lat_context(iter);
4384 		else
4385 			trace_print_context(iter);
4386 	}
4387 
4388 	if (trace_seq_has_overflowed(s))
4389 		return TRACE_TYPE_PARTIAL_LINE;
4390 
4391 	if (event)
4392 		return event->funcs->trace(iter, sym_flags, event);
4393 
4394 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4395 
4396 	return trace_handle_return(s);
4397 }
4398 
print_raw_fmt(struct trace_iterator * iter)4399 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4400 {
4401 	struct trace_array *tr = iter->tr;
4402 	struct trace_seq *s = &iter->seq;
4403 	struct trace_entry *entry;
4404 	struct trace_event *event;
4405 
4406 	entry = iter->ent;
4407 
4408 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4409 		trace_seq_printf(s, "%d %d %llu ",
4410 				 entry->pid, iter->cpu, iter->ts);
4411 
4412 	if (trace_seq_has_overflowed(s))
4413 		return TRACE_TYPE_PARTIAL_LINE;
4414 
4415 	event = ftrace_find_event(entry->type);
4416 	if (event)
4417 		return event->funcs->raw(iter, 0, event);
4418 
4419 	trace_seq_printf(s, "%d ?\n", entry->type);
4420 
4421 	return trace_handle_return(s);
4422 }
4423 
print_hex_fmt(struct trace_iterator * iter)4424 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4425 {
4426 	struct trace_array *tr = iter->tr;
4427 	struct trace_seq *s = &iter->seq;
4428 	unsigned char newline = '\n';
4429 	struct trace_entry *entry;
4430 	struct trace_event *event;
4431 
4432 	entry = iter->ent;
4433 
4434 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4435 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4436 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4437 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4438 		if (trace_seq_has_overflowed(s))
4439 			return TRACE_TYPE_PARTIAL_LINE;
4440 	}
4441 
4442 	event = ftrace_find_event(entry->type);
4443 	if (event) {
4444 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4445 		if (ret != TRACE_TYPE_HANDLED)
4446 			return ret;
4447 	}
4448 
4449 	SEQ_PUT_FIELD(s, newline);
4450 
4451 	return trace_handle_return(s);
4452 }
4453 
print_bin_fmt(struct trace_iterator * iter)4454 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4455 {
4456 	struct trace_array *tr = iter->tr;
4457 	struct trace_seq *s = &iter->seq;
4458 	struct trace_entry *entry;
4459 	struct trace_event *event;
4460 
4461 	entry = iter->ent;
4462 
4463 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4464 		SEQ_PUT_FIELD(s, entry->pid);
4465 		SEQ_PUT_FIELD(s, iter->cpu);
4466 		SEQ_PUT_FIELD(s, iter->ts);
4467 		if (trace_seq_has_overflowed(s))
4468 			return TRACE_TYPE_PARTIAL_LINE;
4469 	}
4470 
4471 	event = ftrace_find_event(entry->type);
4472 	return event ? event->funcs->binary(iter, 0, event) :
4473 		TRACE_TYPE_HANDLED;
4474 }
4475 
trace_empty(struct trace_iterator * iter)4476 int trace_empty(struct trace_iterator *iter)
4477 {
4478 	struct ring_buffer_iter *buf_iter;
4479 	int cpu;
4480 
4481 	/* If we are looking at one CPU buffer, only check that one */
4482 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4483 		cpu = iter->cpu_file;
4484 		buf_iter = trace_buffer_iter(iter, cpu);
4485 		if (buf_iter) {
4486 			if (!ring_buffer_iter_empty(buf_iter))
4487 				return 0;
4488 		} else {
4489 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4490 				return 0;
4491 		}
4492 		return 1;
4493 	}
4494 
4495 	for_each_tracing_cpu(cpu) {
4496 		buf_iter = trace_buffer_iter(iter, cpu);
4497 		if (buf_iter) {
4498 			if (!ring_buffer_iter_empty(buf_iter))
4499 				return 0;
4500 		} else {
4501 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4502 				return 0;
4503 		}
4504 	}
4505 
4506 	return 1;
4507 }
4508 
4509 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4510 enum print_line_t print_trace_line(struct trace_iterator *iter)
4511 {
4512 	struct trace_array *tr = iter->tr;
4513 	unsigned long trace_flags = tr->trace_flags;
4514 	enum print_line_t ret;
4515 
4516 	if (iter->lost_events) {
4517 		if (iter->lost_events == (unsigned long)-1)
4518 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4519 					 iter->cpu);
4520 		else
4521 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4522 					 iter->cpu, iter->lost_events);
4523 		if (trace_seq_has_overflowed(&iter->seq))
4524 			return TRACE_TYPE_PARTIAL_LINE;
4525 	}
4526 
4527 	if (iter->trace && iter->trace->print_line) {
4528 		ret = iter->trace->print_line(iter);
4529 		if (ret != TRACE_TYPE_UNHANDLED)
4530 			return ret;
4531 	}
4532 
4533 	if (iter->ent->type == TRACE_BPUTS &&
4534 			trace_flags & TRACE_ITER_PRINTK &&
4535 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4536 		return trace_print_bputs_msg_only(iter);
4537 
4538 	if (iter->ent->type == TRACE_BPRINT &&
4539 			trace_flags & TRACE_ITER_PRINTK &&
4540 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4541 		return trace_print_bprintk_msg_only(iter);
4542 
4543 	if (iter->ent->type == TRACE_PRINT &&
4544 			trace_flags & TRACE_ITER_PRINTK &&
4545 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4546 		return trace_print_printk_msg_only(iter);
4547 
4548 	if (trace_flags & TRACE_ITER_BIN)
4549 		return print_bin_fmt(iter);
4550 
4551 	if (trace_flags & TRACE_ITER_HEX)
4552 		return print_hex_fmt(iter);
4553 
4554 	if (trace_flags & TRACE_ITER_RAW)
4555 		return print_raw_fmt(iter);
4556 
4557 	return print_trace_fmt(iter);
4558 }
4559 
trace_latency_header(struct seq_file * m)4560 void trace_latency_header(struct seq_file *m)
4561 {
4562 	struct trace_iterator *iter = m->private;
4563 	struct trace_array *tr = iter->tr;
4564 
4565 	/* print nothing if the buffers are empty */
4566 	if (trace_empty(iter))
4567 		return;
4568 
4569 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4570 		print_trace_header(m, iter);
4571 
4572 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4573 		print_lat_help_header(m);
4574 }
4575 
trace_default_header(struct seq_file * m)4576 void trace_default_header(struct seq_file *m)
4577 {
4578 	struct trace_iterator *iter = m->private;
4579 	struct trace_array *tr = iter->tr;
4580 	unsigned long trace_flags = tr->trace_flags;
4581 
4582 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4583 		return;
4584 
4585 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4586 		/* print nothing if the buffers are empty */
4587 		if (trace_empty(iter))
4588 			return;
4589 		print_trace_header(m, iter);
4590 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4591 			print_lat_help_header(m);
4592 	} else {
4593 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4594 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4595 				print_func_help_header_irq(iter->array_buffer,
4596 							   m, trace_flags);
4597 			else
4598 				print_func_help_header(iter->array_buffer, m,
4599 						       trace_flags);
4600 		}
4601 	}
4602 }
4603 
test_ftrace_alive(struct seq_file * m)4604 static void test_ftrace_alive(struct seq_file *m)
4605 {
4606 	if (!ftrace_is_dead())
4607 		return;
4608 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4609 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4610 }
4611 
4612 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4613 static void show_snapshot_main_help(struct seq_file *m)
4614 {
4615 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4616 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4617 		    "#                      Takes a snapshot of the main buffer.\n"
4618 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4619 		    "#                      (Doesn't have to be '2' works with any number that\n"
4620 		    "#                       is not a '0' or '1')\n");
4621 }
4622 
show_snapshot_percpu_help(struct seq_file * m)4623 static void show_snapshot_percpu_help(struct seq_file *m)
4624 {
4625 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4626 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4627 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4628 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4629 #else
4630 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4631 		    "#                     Must use main snapshot file to allocate.\n");
4632 #endif
4633 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4634 		    "#                      (Doesn't have to be '2' works with any number that\n"
4635 		    "#                       is not a '0' or '1')\n");
4636 }
4637 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4638 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4639 {
4640 	if (iter->tr->allocated_snapshot)
4641 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4642 	else
4643 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4644 
4645 	seq_puts(m, "# Snapshot commands:\n");
4646 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4647 		show_snapshot_main_help(m);
4648 	else
4649 		show_snapshot_percpu_help(m);
4650 }
4651 #else
4652 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4653 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4654 #endif
4655 
s_show(struct seq_file * m,void * v)4656 static int s_show(struct seq_file *m, void *v)
4657 {
4658 	struct trace_iterator *iter = v;
4659 	int ret;
4660 
4661 	if (iter->ent == NULL) {
4662 		if (iter->tr) {
4663 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4664 			seq_puts(m, "#\n");
4665 			test_ftrace_alive(m);
4666 		}
4667 		if (iter->snapshot && trace_empty(iter))
4668 			print_snapshot_help(m, iter);
4669 		else if (iter->trace && iter->trace->print_header)
4670 			iter->trace->print_header(m);
4671 		else
4672 			trace_default_header(m);
4673 
4674 	} else if (iter->leftover) {
4675 		/*
4676 		 * If we filled the seq_file buffer earlier, we
4677 		 * want to just show it now.
4678 		 */
4679 		ret = trace_print_seq(m, &iter->seq);
4680 
4681 		/* ret should this time be zero, but you never know */
4682 		iter->leftover = ret;
4683 
4684 	} else {
4685 		ret = print_trace_line(iter);
4686 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4687 			iter->seq.full = 0;
4688 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4689 		}
4690 		ret = trace_print_seq(m, &iter->seq);
4691 		/*
4692 		 * If we overflow the seq_file buffer, then it will
4693 		 * ask us for this data again at start up.
4694 		 * Use that instead.
4695 		 *  ret is 0 if seq_file write succeeded.
4696 		 *        -1 otherwise.
4697 		 */
4698 		iter->leftover = ret;
4699 	}
4700 
4701 	return 0;
4702 }
4703 
4704 /*
4705  * Should be used after trace_array_get(), trace_types_lock
4706  * ensures that i_cdev was already initialized.
4707  */
tracing_get_cpu(struct inode * inode)4708 static inline int tracing_get_cpu(struct inode *inode)
4709 {
4710 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4711 		return (long)inode->i_cdev - 1;
4712 	return RING_BUFFER_ALL_CPUS;
4713 }
4714 
4715 static const struct seq_operations tracer_seq_ops = {
4716 	.start		= s_start,
4717 	.next		= s_next,
4718 	.stop		= s_stop,
4719 	.show		= s_show,
4720 };
4721 
4722 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4723 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4724 {
4725 	struct trace_array *tr = inode->i_private;
4726 	struct trace_iterator *iter;
4727 	int cpu;
4728 
4729 	if (tracing_disabled)
4730 		return ERR_PTR(-ENODEV);
4731 
4732 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4733 	if (!iter)
4734 		return ERR_PTR(-ENOMEM);
4735 
4736 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4737 				    GFP_KERNEL);
4738 	if (!iter->buffer_iter)
4739 		goto release;
4740 
4741 	/*
4742 	 * trace_find_next_entry() may need to save off iter->ent.
4743 	 * It will place it into the iter->temp buffer. As most
4744 	 * events are less than 128, allocate a buffer of that size.
4745 	 * If one is greater, then trace_find_next_entry() will
4746 	 * allocate a new buffer to adjust for the bigger iter->ent.
4747 	 * It's not critical if it fails to get allocated here.
4748 	 */
4749 	iter->temp = kmalloc(128, GFP_KERNEL);
4750 	if (iter->temp)
4751 		iter->temp_size = 128;
4752 
4753 	/*
4754 	 * trace_event_printf() may need to modify given format
4755 	 * string to replace %p with %px so that it shows real address
4756 	 * instead of hash value. However, that is only for the event
4757 	 * tracing, other tracer may not need. Defer the allocation
4758 	 * until it is needed.
4759 	 */
4760 	iter->fmt = NULL;
4761 	iter->fmt_size = 0;
4762 
4763 	/*
4764 	 * We make a copy of the current tracer to avoid concurrent
4765 	 * changes on it while we are reading.
4766 	 */
4767 	mutex_lock(&trace_types_lock);
4768 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4769 	if (!iter->trace)
4770 		goto fail;
4771 
4772 	*iter->trace = *tr->current_trace;
4773 
4774 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4775 		goto fail;
4776 
4777 	iter->tr = tr;
4778 
4779 #ifdef CONFIG_TRACER_MAX_TRACE
4780 	/* Currently only the top directory has a snapshot */
4781 	if (tr->current_trace->print_max || snapshot)
4782 		iter->array_buffer = &tr->max_buffer;
4783 	else
4784 #endif
4785 		iter->array_buffer = &tr->array_buffer;
4786 	iter->snapshot = snapshot;
4787 	iter->pos = -1;
4788 	iter->cpu_file = tracing_get_cpu(inode);
4789 	mutex_init(&iter->mutex);
4790 
4791 	/* Notify the tracer early; before we stop tracing. */
4792 	if (iter->trace->open)
4793 		iter->trace->open(iter);
4794 
4795 	/* Annotate start of buffers if we had overruns */
4796 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4797 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4798 
4799 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4800 	if (trace_clocks[tr->clock_id].in_ns)
4801 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4802 
4803 	/*
4804 	 * If pause-on-trace is enabled, then stop the trace while
4805 	 * dumping, unless this is the "snapshot" file
4806 	 */
4807 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4808 		tracing_stop_tr(tr);
4809 
4810 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4811 		for_each_tracing_cpu(cpu) {
4812 			iter->buffer_iter[cpu] =
4813 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4814 							 cpu, GFP_KERNEL);
4815 		}
4816 		ring_buffer_read_prepare_sync();
4817 		for_each_tracing_cpu(cpu) {
4818 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4819 			tracing_iter_reset(iter, cpu);
4820 		}
4821 	} else {
4822 		cpu = iter->cpu_file;
4823 		iter->buffer_iter[cpu] =
4824 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4825 						 cpu, GFP_KERNEL);
4826 		ring_buffer_read_prepare_sync();
4827 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4828 		tracing_iter_reset(iter, cpu);
4829 	}
4830 
4831 	mutex_unlock(&trace_types_lock);
4832 
4833 	return iter;
4834 
4835  fail:
4836 	mutex_unlock(&trace_types_lock);
4837 	kfree(iter->trace);
4838 	kfree(iter->temp);
4839 	kfree(iter->buffer_iter);
4840 release:
4841 	seq_release_private(inode, file);
4842 	return ERR_PTR(-ENOMEM);
4843 }
4844 
tracing_open_generic(struct inode * inode,struct file * filp)4845 int tracing_open_generic(struct inode *inode, struct file *filp)
4846 {
4847 	int ret;
4848 
4849 	ret = tracing_check_open_get_tr(NULL);
4850 	if (ret)
4851 		return ret;
4852 
4853 	filp->private_data = inode->i_private;
4854 	return 0;
4855 }
4856 
tracing_is_disabled(void)4857 bool tracing_is_disabled(void)
4858 {
4859 	return (tracing_disabled) ? true: false;
4860 }
4861 
4862 /*
4863  * Open and update trace_array ref count.
4864  * Must have the current trace_array passed to it.
4865  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4866 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4867 {
4868 	struct trace_array *tr = inode->i_private;
4869 	int ret;
4870 
4871 	ret = tracing_check_open_get_tr(tr);
4872 	if (ret)
4873 		return ret;
4874 
4875 	filp->private_data = inode->i_private;
4876 
4877 	return 0;
4878 }
4879 
4880 /*
4881  * The private pointer of the inode is the trace_event_file.
4882  * Update the tr ref count associated to it.
4883  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4884 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4885 {
4886 	struct trace_event_file *file = inode->i_private;
4887 	int ret;
4888 
4889 	ret = tracing_check_open_get_tr(file->tr);
4890 	if (ret)
4891 		return ret;
4892 
4893 	filp->private_data = inode->i_private;
4894 
4895 	return 0;
4896 }
4897 
tracing_release_file_tr(struct inode * inode,struct file * filp)4898 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4899 {
4900 	struct trace_event_file *file = inode->i_private;
4901 
4902 	trace_array_put(file->tr);
4903 
4904 	return 0;
4905 }
4906 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4907 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4908 {
4909 	tracing_release_file_tr(inode, filp);
4910 	return single_release(inode, filp);
4911 }
4912 
tracing_mark_open(struct inode * inode,struct file * filp)4913 static int tracing_mark_open(struct inode *inode, struct file *filp)
4914 {
4915 	stream_open(inode, filp);
4916 	return tracing_open_generic_tr(inode, filp);
4917 }
4918 
tracing_release(struct inode * inode,struct file * file)4919 static int tracing_release(struct inode *inode, struct file *file)
4920 {
4921 	struct trace_array *tr = inode->i_private;
4922 	struct seq_file *m = file->private_data;
4923 	struct trace_iterator *iter;
4924 	int cpu;
4925 
4926 	if (!(file->f_mode & FMODE_READ)) {
4927 		trace_array_put(tr);
4928 		return 0;
4929 	}
4930 
4931 	/* Writes do not use seq_file */
4932 	iter = m->private;
4933 	mutex_lock(&trace_types_lock);
4934 
4935 	for_each_tracing_cpu(cpu) {
4936 		if (iter->buffer_iter[cpu])
4937 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4938 	}
4939 
4940 	if (iter->trace && iter->trace->close)
4941 		iter->trace->close(iter);
4942 
4943 	if (!iter->snapshot && tr->stop_count)
4944 		/* reenable tracing if it was previously enabled */
4945 		tracing_start_tr(tr);
4946 
4947 	__trace_array_put(tr);
4948 
4949 	mutex_unlock(&trace_types_lock);
4950 
4951 	mutex_destroy(&iter->mutex);
4952 	free_cpumask_var(iter->started);
4953 	kfree(iter->fmt);
4954 	kfree(iter->temp);
4955 	kfree(iter->trace);
4956 	kfree(iter->buffer_iter);
4957 	seq_release_private(inode, file);
4958 
4959 	return 0;
4960 }
4961 
tracing_release_generic_tr(struct inode * inode,struct file * file)4962 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4963 {
4964 	struct trace_array *tr = inode->i_private;
4965 
4966 	trace_array_put(tr);
4967 	return 0;
4968 }
4969 
tracing_single_release_tr(struct inode * inode,struct file * file)4970 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4971 {
4972 	struct trace_array *tr = inode->i_private;
4973 
4974 	trace_array_put(tr);
4975 
4976 	return single_release(inode, file);
4977 }
4978 
tracing_open(struct inode * inode,struct file * file)4979 static int tracing_open(struct inode *inode, struct file *file)
4980 {
4981 	struct trace_array *tr = inode->i_private;
4982 	struct trace_iterator *iter;
4983 	int ret;
4984 
4985 	ret = tracing_check_open_get_tr(tr);
4986 	if (ret)
4987 		return ret;
4988 
4989 	/* If this file was open for write, then erase contents */
4990 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4991 		int cpu = tracing_get_cpu(inode);
4992 		struct array_buffer *trace_buf = &tr->array_buffer;
4993 
4994 #ifdef CONFIG_TRACER_MAX_TRACE
4995 		if (tr->current_trace->print_max)
4996 			trace_buf = &tr->max_buffer;
4997 #endif
4998 
4999 		if (cpu == RING_BUFFER_ALL_CPUS)
5000 			tracing_reset_online_cpus(trace_buf);
5001 		else
5002 			tracing_reset_cpu(trace_buf, cpu);
5003 	}
5004 
5005 	if (file->f_mode & FMODE_READ) {
5006 		iter = __tracing_open(inode, file, false);
5007 		if (IS_ERR(iter))
5008 			ret = PTR_ERR(iter);
5009 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5010 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5011 	}
5012 
5013 	if (ret < 0)
5014 		trace_array_put(tr);
5015 
5016 	return ret;
5017 }
5018 
5019 /*
5020  * Some tracers are not suitable for instance buffers.
5021  * A tracer is always available for the global array (toplevel)
5022  * or if it explicitly states that it is.
5023  */
5024 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)5025 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5026 {
5027 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5028 }
5029 
5030 /* Find the next tracer that this trace array may use */
5031 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)5032 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5033 {
5034 	while (t && !trace_ok_for_array(t, tr))
5035 		t = t->next;
5036 
5037 	return t;
5038 }
5039 
5040 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)5041 t_next(struct seq_file *m, void *v, loff_t *pos)
5042 {
5043 	struct trace_array *tr = m->private;
5044 	struct tracer *t = v;
5045 
5046 	(*pos)++;
5047 
5048 	if (t)
5049 		t = get_tracer_for_array(tr, t->next);
5050 
5051 	return t;
5052 }
5053 
t_start(struct seq_file * m,loff_t * pos)5054 static void *t_start(struct seq_file *m, loff_t *pos)
5055 {
5056 	struct trace_array *tr = m->private;
5057 	struct tracer *t;
5058 	loff_t l = 0;
5059 
5060 	mutex_lock(&trace_types_lock);
5061 
5062 	t = get_tracer_for_array(tr, trace_types);
5063 	for (; t && l < *pos; t = t_next(m, t, &l))
5064 			;
5065 
5066 	return t;
5067 }
5068 
t_stop(struct seq_file * m,void * p)5069 static void t_stop(struct seq_file *m, void *p)
5070 {
5071 	mutex_unlock(&trace_types_lock);
5072 }
5073 
t_show(struct seq_file * m,void * v)5074 static int t_show(struct seq_file *m, void *v)
5075 {
5076 	struct tracer *t = v;
5077 
5078 	if (!t)
5079 		return 0;
5080 
5081 	seq_puts(m, t->name);
5082 	if (t->next)
5083 		seq_putc(m, ' ');
5084 	else
5085 		seq_putc(m, '\n');
5086 
5087 	return 0;
5088 }
5089 
5090 static const struct seq_operations show_traces_seq_ops = {
5091 	.start		= t_start,
5092 	.next		= t_next,
5093 	.stop		= t_stop,
5094 	.show		= t_show,
5095 };
5096 
show_traces_open(struct inode * inode,struct file * file)5097 static int show_traces_open(struct inode *inode, struct file *file)
5098 {
5099 	struct trace_array *tr = inode->i_private;
5100 	struct seq_file *m;
5101 	int ret;
5102 
5103 	ret = tracing_check_open_get_tr(tr);
5104 	if (ret)
5105 		return ret;
5106 
5107 	ret = seq_open(file, &show_traces_seq_ops);
5108 	if (ret) {
5109 		trace_array_put(tr);
5110 		return ret;
5111 	}
5112 
5113 	m = file->private_data;
5114 	m->private = tr;
5115 
5116 	return 0;
5117 }
5118 
show_traces_release(struct inode * inode,struct file * file)5119 static int show_traces_release(struct inode *inode, struct file *file)
5120 {
5121 	struct trace_array *tr = inode->i_private;
5122 
5123 	trace_array_put(tr);
5124 	return seq_release(inode, file);
5125 }
5126 
5127 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5128 tracing_write_stub(struct file *filp, const char __user *ubuf,
5129 		   size_t count, loff_t *ppos)
5130 {
5131 	return count;
5132 }
5133 
tracing_lseek(struct file * file,loff_t offset,int whence)5134 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5135 {
5136 	int ret;
5137 
5138 	if (file->f_mode & FMODE_READ)
5139 		ret = seq_lseek(file, offset, whence);
5140 	else
5141 		file->f_pos = ret = 0;
5142 
5143 	return ret;
5144 }
5145 
5146 static const struct file_operations tracing_fops = {
5147 	.open		= tracing_open,
5148 	.read		= seq_read,
5149 	.read_iter	= seq_read_iter,
5150 	.splice_read	= generic_file_splice_read,
5151 	.write		= tracing_write_stub,
5152 	.llseek		= tracing_lseek,
5153 	.release	= tracing_release,
5154 };
5155 
5156 static const struct file_operations show_traces_fops = {
5157 	.open		= show_traces_open,
5158 	.read		= seq_read,
5159 	.llseek		= seq_lseek,
5160 	.release	= show_traces_release,
5161 };
5162 
5163 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5164 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5165 		     size_t count, loff_t *ppos)
5166 {
5167 	struct trace_array *tr = file_inode(filp)->i_private;
5168 	char *mask_str;
5169 	int len;
5170 
5171 	len = snprintf(NULL, 0, "%*pb\n",
5172 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5173 	mask_str = kmalloc(len, GFP_KERNEL);
5174 	if (!mask_str)
5175 		return -ENOMEM;
5176 
5177 	len = snprintf(mask_str, len, "%*pb\n",
5178 		       cpumask_pr_args(tr->tracing_cpumask));
5179 	if (len >= count) {
5180 		count = -EINVAL;
5181 		goto out_err;
5182 	}
5183 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5184 
5185 out_err:
5186 	kfree(mask_str);
5187 
5188 	return count;
5189 }
5190 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5191 int tracing_set_cpumask(struct trace_array *tr,
5192 			cpumask_var_t tracing_cpumask_new)
5193 {
5194 	int cpu;
5195 
5196 	if (!tr)
5197 		return -EINVAL;
5198 
5199 	local_irq_disable();
5200 	arch_spin_lock(&tr->max_lock);
5201 	for_each_tracing_cpu(cpu) {
5202 		/*
5203 		 * Increase/decrease the disabled counter if we are
5204 		 * about to flip a bit in the cpumask:
5205 		 */
5206 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5207 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5208 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5209 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5210 #ifdef CONFIG_TRACER_MAX_TRACE
5211 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5212 #endif
5213 		}
5214 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5215 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5216 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5217 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5218 #ifdef CONFIG_TRACER_MAX_TRACE
5219 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5220 #endif
5221 		}
5222 	}
5223 	arch_spin_unlock(&tr->max_lock);
5224 	local_irq_enable();
5225 
5226 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5227 
5228 	return 0;
5229 }
5230 
5231 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5232 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5233 		      size_t count, loff_t *ppos)
5234 {
5235 	struct trace_array *tr = file_inode(filp)->i_private;
5236 	cpumask_var_t tracing_cpumask_new;
5237 	int err;
5238 
5239 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5240 		return -ENOMEM;
5241 
5242 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5243 	if (err)
5244 		goto err_free;
5245 
5246 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5247 	if (err)
5248 		goto err_free;
5249 
5250 	free_cpumask_var(tracing_cpumask_new);
5251 
5252 	return count;
5253 
5254 err_free:
5255 	free_cpumask_var(tracing_cpumask_new);
5256 
5257 	return err;
5258 }
5259 
5260 static const struct file_operations tracing_cpumask_fops = {
5261 	.open		= tracing_open_generic_tr,
5262 	.read		= tracing_cpumask_read,
5263 	.write		= tracing_cpumask_write,
5264 	.release	= tracing_release_generic_tr,
5265 	.llseek		= generic_file_llseek,
5266 };
5267 
tracing_trace_options_show(struct seq_file * m,void * v)5268 static int tracing_trace_options_show(struct seq_file *m, void *v)
5269 {
5270 	struct tracer_opt *trace_opts;
5271 	struct trace_array *tr = m->private;
5272 	u32 tracer_flags;
5273 	int i;
5274 
5275 	mutex_lock(&trace_types_lock);
5276 	tracer_flags = tr->current_trace->flags->val;
5277 	trace_opts = tr->current_trace->flags->opts;
5278 
5279 	for (i = 0; trace_options[i]; i++) {
5280 		if (tr->trace_flags & (1 << i))
5281 			seq_printf(m, "%s\n", trace_options[i]);
5282 		else
5283 			seq_printf(m, "no%s\n", trace_options[i]);
5284 	}
5285 
5286 	for (i = 0; trace_opts[i].name; i++) {
5287 		if (tracer_flags & trace_opts[i].bit)
5288 			seq_printf(m, "%s\n", trace_opts[i].name);
5289 		else
5290 			seq_printf(m, "no%s\n", trace_opts[i].name);
5291 	}
5292 	mutex_unlock(&trace_types_lock);
5293 
5294 	return 0;
5295 }
5296 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5297 static int __set_tracer_option(struct trace_array *tr,
5298 			       struct tracer_flags *tracer_flags,
5299 			       struct tracer_opt *opts, int neg)
5300 {
5301 	struct tracer *trace = tracer_flags->trace;
5302 	int ret;
5303 
5304 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5305 	if (ret)
5306 		return ret;
5307 
5308 	if (neg)
5309 		tracer_flags->val &= ~opts->bit;
5310 	else
5311 		tracer_flags->val |= opts->bit;
5312 	return 0;
5313 }
5314 
5315 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5316 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5317 {
5318 	struct tracer *trace = tr->current_trace;
5319 	struct tracer_flags *tracer_flags = trace->flags;
5320 	struct tracer_opt *opts = NULL;
5321 	int i;
5322 
5323 	for (i = 0; tracer_flags->opts[i].name; i++) {
5324 		opts = &tracer_flags->opts[i];
5325 
5326 		if (strcmp(cmp, opts->name) == 0)
5327 			return __set_tracer_option(tr, trace->flags, opts, neg);
5328 	}
5329 
5330 	return -EINVAL;
5331 }
5332 
5333 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5334 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5335 {
5336 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5337 		return -1;
5338 
5339 	return 0;
5340 }
5341 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5342 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5343 {
5344 	int *map;
5345 
5346 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5347 	    (mask == TRACE_ITER_RECORD_CMD))
5348 		lockdep_assert_held(&event_mutex);
5349 
5350 	/* do nothing if flag is already set */
5351 	if (!!(tr->trace_flags & mask) == !!enabled)
5352 		return 0;
5353 
5354 	/* Give the tracer a chance to approve the change */
5355 	if (tr->current_trace->flag_changed)
5356 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5357 			return -EINVAL;
5358 
5359 	if (enabled)
5360 		tr->trace_flags |= mask;
5361 	else
5362 		tr->trace_flags &= ~mask;
5363 
5364 	if (mask == TRACE_ITER_RECORD_CMD)
5365 		trace_event_enable_cmd_record(enabled);
5366 
5367 	if (mask == TRACE_ITER_RECORD_TGID) {
5368 		if (!tgid_map) {
5369 			tgid_map_max = pid_max;
5370 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5371 				       GFP_KERNEL);
5372 
5373 			/*
5374 			 * Pairs with smp_load_acquire() in
5375 			 * trace_find_tgid_ptr() to ensure that if it observes
5376 			 * the tgid_map we just allocated then it also observes
5377 			 * the corresponding tgid_map_max value.
5378 			 */
5379 			smp_store_release(&tgid_map, map);
5380 		}
5381 		if (!tgid_map) {
5382 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5383 			return -ENOMEM;
5384 		}
5385 
5386 		trace_event_enable_tgid_record(enabled);
5387 	}
5388 
5389 	if (mask == TRACE_ITER_EVENT_FORK)
5390 		trace_event_follow_fork(tr, enabled);
5391 
5392 	if (mask == TRACE_ITER_FUNC_FORK)
5393 		ftrace_pid_follow_fork(tr, enabled);
5394 
5395 	if (mask == TRACE_ITER_OVERWRITE) {
5396 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5397 #ifdef CONFIG_TRACER_MAX_TRACE
5398 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5399 #endif
5400 	}
5401 
5402 	if (mask == TRACE_ITER_PRINTK) {
5403 		trace_printk_start_stop_comm(enabled);
5404 		trace_printk_control(enabled);
5405 	}
5406 
5407 	return 0;
5408 }
5409 
trace_set_options(struct trace_array * tr,char * option)5410 int trace_set_options(struct trace_array *tr, char *option)
5411 {
5412 	char *cmp;
5413 	int neg = 0;
5414 	int ret;
5415 	size_t orig_len = strlen(option);
5416 	int len;
5417 
5418 	cmp = strstrip(option);
5419 
5420 	len = str_has_prefix(cmp, "no");
5421 	if (len)
5422 		neg = 1;
5423 
5424 	cmp += len;
5425 
5426 	mutex_lock(&event_mutex);
5427 	mutex_lock(&trace_types_lock);
5428 
5429 	ret = match_string(trace_options, -1, cmp);
5430 	/* If no option could be set, test the specific tracer options */
5431 	if (ret < 0)
5432 		ret = set_tracer_option(tr, cmp, neg);
5433 	else
5434 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5435 
5436 	mutex_unlock(&trace_types_lock);
5437 	mutex_unlock(&event_mutex);
5438 
5439 	/*
5440 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5441 	 * turn it back into a space.
5442 	 */
5443 	if (orig_len > strlen(option))
5444 		option[strlen(option)] = ' ';
5445 
5446 	return ret;
5447 }
5448 
apply_trace_boot_options(void)5449 static void __init apply_trace_boot_options(void)
5450 {
5451 	char *buf = trace_boot_options_buf;
5452 	char *option;
5453 
5454 	while (true) {
5455 		option = strsep(&buf, ",");
5456 
5457 		if (!option)
5458 			break;
5459 
5460 		if (*option)
5461 			trace_set_options(&global_trace, option);
5462 
5463 		/* Put back the comma to allow this to be called again */
5464 		if (buf)
5465 			*(buf - 1) = ',';
5466 	}
5467 }
5468 
5469 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5470 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5471 			size_t cnt, loff_t *ppos)
5472 {
5473 	struct seq_file *m = filp->private_data;
5474 	struct trace_array *tr = m->private;
5475 	char buf[64];
5476 	int ret;
5477 
5478 	if (cnt >= sizeof(buf))
5479 		return -EINVAL;
5480 
5481 	if (copy_from_user(buf, ubuf, cnt))
5482 		return -EFAULT;
5483 
5484 	buf[cnt] = 0;
5485 
5486 	ret = trace_set_options(tr, buf);
5487 	if (ret < 0)
5488 		return ret;
5489 
5490 	*ppos += cnt;
5491 
5492 	return cnt;
5493 }
5494 
tracing_trace_options_open(struct inode * inode,struct file * file)5495 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5496 {
5497 	struct trace_array *tr = inode->i_private;
5498 	int ret;
5499 
5500 	ret = tracing_check_open_get_tr(tr);
5501 	if (ret)
5502 		return ret;
5503 
5504 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5505 	if (ret < 0)
5506 		trace_array_put(tr);
5507 
5508 	return ret;
5509 }
5510 
5511 static const struct file_operations tracing_iter_fops = {
5512 	.open		= tracing_trace_options_open,
5513 	.read		= seq_read,
5514 	.llseek		= seq_lseek,
5515 	.release	= tracing_single_release_tr,
5516 	.write		= tracing_trace_options_write,
5517 };
5518 
5519 static const char readme_msg[] =
5520 	"tracing mini-HOWTO:\n\n"
5521 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5522 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5523 	" Important files:\n"
5524 	"  trace\t\t\t- The static contents of the buffer\n"
5525 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5526 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5527 	"  current_tracer\t- function and latency tracers\n"
5528 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5529 	"  error_log\t- error log for failed commands (that support it)\n"
5530 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5531 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5532 	"  trace_clock\t\t-change the clock used to order events\n"
5533 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5534 	"      global:   Synced across CPUs but slows tracing down.\n"
5535 	"     counter:   Not a clock, but just an increment\n"
5536 	"      uptime:   Jiffy counter from time of boot\n"
5537 	"        perf:   Same clock that perf events use\n"
5538 #ifdef CONFIG_X86_64
5539 	"     x86-tsc:   TSC cycle counter\n"
5540 #endif
5541 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5542 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5543 	"    absolute:   Absolute (standalone) timestamp\n"
5544 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5545 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5546 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5547 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5548 	"\t\t\t  Remove sub-buffer with rmdir\n"
5549 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5550 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5551 	"\t\t\t  option name\n"
5552 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5553 #ifdef CONFIG_DYNAMIC_FTRACE
5554 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5555 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5556 	"\t\t\t  functions\n"
5557 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5558 	"\t     modules: Can select a group via module\n"
5559 	"\t      Format: :mod:<module-name>\n"
5560 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5561 	"\t    triggers: a command to perform when function is hit\n"
5562 	"\t      Format: <function>:<trigger>[:count]\n"
5563 	"\t     trigger: traceon, traceoff\n"
5564 	"\t\t      enable_event:<system>:<event>\n"
5565 	"\t\t      disable_event:<system>:<event>\n"
5566 #ifdef CONFIG_STACKTRACE
5567 	"\t\t      stacktrace\n"
5568 #endif
5569 #ifdef CONFIG_TRACER_SNAPSHOT
5570 	"\t\t      snapshot\n"
5571 #endif
5572 	"\t\t      dump\n"
5573 	"\t\t      cpudump\n"
5574 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5575 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5576 	"\t     The first one will disable tracing every time do_fault is hit\n"
5577 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5578 	"\t       The first time do trap is hit and it disables tracing, the\n"
5579 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5580 	"\t       the counter will not decrement. It only decrements when the\n"
5581 	"\t       trigger did work\n"
5582 	"\t     To remove trigger without count:\n"
5583 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5584 	"\t     To remove trigger with a count:\n"
5585 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5586 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5587 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5588 	"\t    modules: Can select a group via module command :mod:\n"
5589 	"\t    Does not accept triggers\n"
5590 #endif /* CONFIG_DYNAMIC_FTRACE */
5591 #ifdef CONFIG_FUNCTION_TRACER
5592 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5593 	"\t\t    (function)\n"
5594 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5595 	"\t\t    (function)\n"
5596 #endif
5597 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5598 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5599 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5600 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5601 #endif
5602 #ifdef CONFIG_TRACER_SNAPSHOT
5603 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5604 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5605 	"\t\t\t  information\n"
5606 #endif
5607 #ifdef CONFIG_STACK_TRACER
5608 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5609 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5610 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5611 	"\t\t\t  new trace)\n"
5612 #ifdef CONFIG_DYNAMIC_FTRACE
5613 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5614 	"\t\t\t  traces\n"
5615 #endif
5616 #endif /* CONFIG_STACK_TRACER */
5617 #ifdef CONFIG_DYNAMIC_EVENTS
5618 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5619 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5620 #endif
5621 #ifdef CONFIG_KPROBE_EVENTS
5622 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5623 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5624 #endif
5625 #ifdef CONFIG_UPROBE_EVENTS
5626 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5627 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5628 #endif
5629 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5630 	"\t  accepts: event-definitions (one definition per line)\n"
5631 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5632 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5633 #ifdef CONFIG_HIST_TRIGGERS
5634 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5635 #endif
5636 	"\t           e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n"
5637 	"\t           -:[<group>/]<event>\n"
5638 #ifdef CONFIG_KPROBE_EVENTS
5639 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5640   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5641 #endif
5642 #ifdef CONFIG_UPROBE_EVENTS
5643   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5644 #endif
5645 	"\t     args: <name>=fetcharg[:type]\n"
5646 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5647 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5648 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5649 #else
5650 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5651 #endif
5652 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5653 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5654 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5655 	"\t           symstr, <type>\\[<array-size>\\]\n"
5656 #ifdef CONFIG_HIST_TRIGGERS
5657 	"\t    field: <stype> <name>;\n"
5658 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5659 	"\t           [unsigned] char/int/long\n"
5660 #endif
5661 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5662 	"\t            of the <attached-group>/<attached-event>.\n"
5663 #endif
5664 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5665 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5666 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5667 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5668 	"\t\t\t  events\n"
5669 	"      filter\t\t- If set, only events passing filter are traced\n"
5670 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5671 	"\t\t\t  <event>:\n"
5672 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5673 	"      filter\t\t- If set, only events passing filter are traced\n"
5674 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5675 	"\t    Format: <trigger>[:count][if <filter>]\n"
5676 	"\t   trigger: traceon, traceoff\n"
5677 	"\t            enable_event:<system>:<event>\n"
5678 	"\t            disable_event:<system>:<event>\n"
5679 #ifdef CONFIG_HIST_TRIGGERS
5680 	"\t            enable_hist:<system>:<event>\n"
5681 	"\t            disable_hist:<system>:<event>\n"
5682 #endif
5683 #ifdef CONFIG_STACKTRACE
5684 	"\t\t    stacktrace\n"
5685 #endif
5686 #ifdef CONFIG_TRACER_SNAPSHOT
5687 	"\t\t    snapshot\n"
5688 #endif
5689 #ifdef CONFIG_HIST_TRIGGERS
5690 	"\t\t    hist (see below)\n"
5691 #endif
5692 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5693 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5694 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5695 	"\t                  events/block/block_unplug/trigger\n"
5696 	"\t   The first disables tracing every time block_unplug is hit.\n"
5697 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5698 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5699 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5700 	"\t   Like function triggers, the counter is only decremented if it\n"
5701 	"\t    enabled or disabled tracing.\n"
5702 	"\t   To remove a trigger without a count:\n"
5703 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5704 	"\t   To remove a trigger with a count:\n"
5705 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5706 	"\t   Filters can be ignored when removing a trigger.\n"
5707 #ifdef CONFIG_HIST_TRIGGERS
5708 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5709 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5710 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5711 	"\t            [:values=<field1[,field2,...]>]\n"
5712 	"\t            [:sort=<field1[,field2,...]>]\n"
5713 	"\t            [:size=#entries]\n"
5714 	"\t            [:pause][:continue][:clear]\n"
5715 	"\t            [:name=histname1]\n"
5716 	"\t            [:<handler>.<action>]\n"
5717 	"\t            [if <filter>]\n\n"
5718 	"\t    Note, special fields can be used as well:\n"
5719 	"\t            common_timestamp - to record current timestamp\n"
5720 	"\t            common_cpu - to record the CPU the event happened on\n"
5721 	"\n"
5722 	"\t    A hist trigger variable can be:\n"
5723 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5724 	"\t        - a reference to another variable e.g. y=$x,\n"
5725 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5726 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5727 	"\n"
5728 	"\t    hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
5729 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5730 	"\t    variable reference, field or numeric literal.\n"
5731 	"\n"
5732 	"\t    When a matching event is hit, an entry is added to a hash\n"
5733 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5734 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5735 	"\t    correspond to fields in the event's format description.  Keys\n"
5736 	"\t    can be any field, or the special string 'stacktrace'.\n"
5737 	"\t    Compound keys consisting of up to two fields can be specified\n"
5738 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5739 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5740 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5741 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5742 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5743 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5744 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5745 	"\t    its histogram data will be shared with other triggers of the\n"
5746 	"\t    same name, and trigger hits will update this common data.\n\n"
5747 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5748 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5749 	"\t    triggers attached to an event, there will be a table for each\n"
5750 	"\t    trigger in the output.  The table displayed for a named\n"
5751 	"\t    trigger will be the same as any other instance having the\n"
5752 	"\t    same name.  The default format used to display a given field\n"
5753 	"\t    can be modified by appending any of the following modifiers\n"
5754 	"\t    to the field name, as applicable:\n\n"
5755 	"\t            .hex        display a number as a hex value\n"
5756 	"\t            .sym        display an address as a symbol\n"
5757 	"\t            .sym-offset display an address as a symbol and offset\n"
5758 	"\t            .execname   display a common_pid as a program name\n"
5759 	"\t            .syscall    display a syscall id as a syscall name\n"
5760 	"\t            .log2       display log2 value rather than raw number\n"
5761 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5762 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5763 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5764 	"\t    trigger or to start a hist trigger but not log any events\n"
5765 	"\t    until told to do so.  'continue' can be used to start or\n"
5766 	"\t    restart a paused hist trigger.\n\n"
5767 	"\t    The 'clear' parameter will clear the contents of a running\n"
5768 	"\t    hist trigger and leave its current paused/active state\n"
5769 	"\t    unchanged.\n\n"
5770 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5771 	"\t    have one event conditionally start and stop another event's\n"
5772 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5773 	"\t    the enable_event and disable_event triggers.\n\n"
5774 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5775 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5776 	"\t        <handler>.<action>\n\n"
5777 	"\t    The available handlers are:\n\n"
5778 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5779 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5780 	"\t        onchange(var)            - invoke action if var changes\n\n"
5781 	"\t    The available actions are:\n\n"
5782 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5783 	"\t        save(field,...)                      - save current event fields\n"
5784 #ifdef CONFIG_TRACER_SNAPSHOT
5785 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5786 #endif
5787 #ifdef CONFIG_SYNTH_EVENTS
5788 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5789 	"\t  Write into this file to define/undefine new synthetic events.\n"
5790 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5791 #endif
5792 #endif
5793 ;
5794 
5795 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5796 tracing_readme_read(struct file *filp, char __user *ubuf,
5797 		       size_t cnt, loff_t *ppos)
5798 {
5799 	return simple_read_from_buffer(ubuf, cnt, ppos,
5800 					readme_msg, strlen(readme_msg));
5801 }
5802 
5803 static const struct file_operations tracing_readme_fops = {
5804 	.open		= tracing_open_generic,
5805 	.read		= tracing_readme_read,
5806 	.llseek		= generic_file_llseek,
5807 };
5808 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5809 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5810 {
5811 	int pid = ++(*pos);
5812 
5813 	return trace_find_tgid_ptr(pid);
5814 }
5815 
saved_tgids_start(struct seq_file * m,loff_t * pos)5816 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5817 {
5818 	int pid = *pos;
5819 
5820 	return trace_find_tgid_ptr(pid);
5821 }
5822 
saved_tgids_stop(struct seq_file * m,void * v)5823 static void saved_tgids_stop(struct seq_file *m, void *v)
5824 {
5825 }
5826 
saved_tgids_show(struct seq_file * m,void * v)5827 static int saved_tgids_show(struct seq_file *m, void *v)
5828 {
5829 	int *entry = (int *)v;
5830 	int pid = entry - tgid_map;
5831 	int tgid = *entry;
5832 
5833 	if (tgid == 0)
5834 		return SEQ_SKIP;
5835 
5836 	seq_printf(m, "%d %d\n", pid, tgid);
5837 	return 0;
5838 }
5839 
5840 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5841 	.start		= saved_tgids_start,
5842 	.stop		= saved_tgids_stop,
5843 	.next		= saved_tgids_next,
5844 	.show		= saved_tgids_show,
5845 };
5846 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5847 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5848 {
5849 	int ret;
5850 
5851 	ret = tracing_check_open_get_tr(NULL);
5852 	if (ret)
5853 		return ret;
5854 
5855 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5856 }
5857 
5858 
5859 static const struct file_operations tracing_saved_tgids_fops = {
5860 	.open		= tracing_saved_tgids_open,
5861 	.read		= seq_read,
5862 	.llseek		= seq_lseek,
5863 	.release	= seq_release,
5864 };
5865 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5866 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5867 {
5868 	unsigned int *ptr = v;
5869 
5870 	if (*pos || m->count)
5871 		ptr++;
5872 
5873 	(*pos)++;
5874 
5875 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5876 	     ptr++) {
5877 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5878 			continue;
5879 
5880 		return ptr;
5881 	}
5882 
5883 	return NULL;
5884 }
5885 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5886 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5887 {
5888 	void *v;
5889 	loff_t l = 0;
5890 
5891 	preempt_disable();
5892 	arch_spin_lock(&trace_cmdline_lock);
5893 
5894 	v = &savedcmd->map_cmdline_to_pid[0];
5895 	while (l <= *pos) {
5896 		v = saved_cmdlines_next(m, v, &l);
5897 		if (!v)
5898 			return NULL;
5899 	}
5900 
5901 	return v;
5902 }
5903 
saved_cmdlines_stop(struct seq_file * m,void * v)5904 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5905 {
5906 	arch_spin_unlock(&trace_cmdline_lock);
5907 	preempt_enable();
5908 }
5909 
saved_cmdlines_show(struct seq_file * m,void * v)5910 static int saved_cmdlines_show(struct seq_file *m, void *v)
5911 {
5912 	char buf[TASK_COMM_LEN];
5913 	unsigned int *pid = v;
5914 
5915 	__trace_find_cmdline(*pid, buf);
5916 	seq_printf(m, "%d %s\n", *pid, buf);
5917 	return 0;
5918 }
5919 
5920 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5921 	.start		= saved_cmdlines_start,
5922 	.next		= saved_cmdlines_next,
5923 	.stop		= saved_cmdlines_stop,
5924 	.show		= saved_cmdlines_show,
5925 };
5926 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5927 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5928 {
5929 	int ret;
5930 
5931 	ret = tracing_check_open_get_tr(NULL);
5932 	if (ret)
5933 		return ret;
5934 
5935 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5936 }
5937 
5938 static const struct file_operations tracing_saved_cmdlines_fops = {
5939 	.open		= tracing_saved_cmdlines_open,
5940 	.read		= seq_read,
5941 	.llseek		= seq_lseek,
5942 	.release	= seq_release,
5943 };
5944 
5945 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5946 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5947 				 size_t cnt, loff_t *ppos)
5948 {
5949 	char buf[64];
5950 	int r;
5951 
5952 	preempt_disable();
5953 	arch_spin_lock(&trace_cmdline_lock);
5954 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5955 	arch_spin_unlock(&trace_cmdline_lock);
5956 	preempt_enable();
5957 
5958 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5959 }
5960 
tracing_resize_saved_cmdlines(unsigned int val)5961 static int tracing_resize_saved_cmdlines(unsigned int val)
5962 {
5963 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5964 
5965 	s = allocate_cmdlines_buffer(val);
5966 	if (!s)
5967 		return -ENOMEM;
5968 
5969 	preempt_disable();
5970 	arch_spin_lock(&trace_cmdline_lock);
5971 	savedcmd_temp = savedcmd;
5972 	savedcmd = s;
5973 	arch_spin_unlock(&trace_cmdline_lock);
5974 	preempt_enable();
5975 	free_saved_cmdlines_buffer(savedcmd_temp);
5976 
5977 	return 0;
5978 }
5979 
5980 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5981 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5982 				  size_t cnt, loff_t *ppos)
5983 {
5984 	unsigned long val;
5985 	int ret;
5986 
5987 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5988 	if (ret)
5989 		return ret;
5990 
5991 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5992 	if (!val || val > PID_MAX_DEFAULT)
5993 		return -EINVAL;
5994 
5995 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5996 	if (ret < 0)
5997 		return ret;
5998 
5999 	*ppos += cnt;
6000 
6001 	return cnt;
6002 }
6003 
6004 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6005 	.open		= tracing_open_generic,
6006 	.read		= tracing_saved_cmdlines_size_read,
6007 	.write		= tracing_saved_cmdlines_size_write,
6008 };
6009 
6010 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6011 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)6012 update_eval_map(union trace_eval_map_item *ptr)
6013 {
6014 	if (!ptr->map.eval_string) {
6015 		if (ptr->tail.next) {
6016 			ptr = ptr->tail.next;
6017 			/* Set ptr to the next real item (skip head) */
6018 			ptr++;
6019 		} else
6020 			return NULL;
6021 	}
6022 	return ptr;
6023 }
6024 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)6025 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6026 {
6027 	union trace_eval_map_item *ptr = v;
6028 
6029 	/*
6030 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6031 	 * This really should never happen.
6032 	 */
6033 	(*pos)++;
6034 	ptr = update_eval_map(ptr);
6035 	if (WARN_ON_ONCE(!ptr))
6036 		return NULL;
6037 
6038 	ptr++;
6039 	ptr = update_eval_map(ptr);
6040 
6041 	return ptr;
6042 }
6043 
eval_map_start(struct seq_file * m,loff_t * pos)6044 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6045 {
6046 	union trace_eval_map_item *v;
6047 	loff_t l = 0;
6048 
6049 	mutex_lock(&trace_eval_mutex);
6050 
6051 	v = trace_eval_maps;
6052 	if (v)
6053 		v++;
6054 
6055 	while (v && l < *pos) {
6056 		v = eval_map_next(m, v, &l);
6057 	}
6058 
6059 	return v;
6060 }
6061 
eval_map_stop(struct seq_file * m,void * v)6062 static void eval_map_stop(struct seq_file *m, void *v)
6063 {
6064 	mutex_unlock(&trace_eval_mutex);
6065 }
6066 
eval_map_show(struct seq_file * m,void * v)6067 static int eval_map_show(struct seq_file *m, void *v)
6068 {
6069 	union trace_eval_map_item *ptr = v;
6070 
6071 	seq_printf(m, "%s %ld (%s)\n",
6072 		   ptr->map.eval_string, ptr->map.eval_value,
6073 		   ptr->map.system);
6074 
6075 	return 0;
6076 }
6077 
6078 static const struct seq_operations tracing_eval_map_seq_ops = {
6079 	.start		= eval_map_start,
6080 	.next		= eval_map_next,
6081 	.stop		= eval_map_stop,
6082 	.show		= eval_map_show,
6083 };
6084 
tracing_eval_map_open(struct inode * inode,struct file * filp)6085 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6086 {
6087 	int ret;
6088 
6089 	ret = tracing_check_open_get_tr(NULL);
6090 	if (ret)
6091 		return ret;
6092 
6093 	return seq_open(filp, &tracing_eval_map_seq_ops);
6094 }
6095 
6096 static const struct file_operations tracing_eval_map_fops = {
6097 	.open		= tracing_eval_map_open,
6098 	.read		= seq_read,
6099 	.llseek		= seq_lseek,
6100 	.release	= seq_release,
6101 };
6102 
6103 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)6104 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6105 {
6106 	/* Return tail of array given the head */
6107 	return ptr + ptr->head.length + 1;
6108 }
6109 
6110 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6111 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6112 			   int len)
6113 {
6114 	struct trace_eval_map **stop;
6115 	struct trace_eval_map **map;
6116 	union trace_eval_map_item *map_array;
6117 	union trace_eval_map_item *ptr;
6118 
6119 	stop = start + len;
6120 
6121 	/*
6122 	 * The trace_eval_maps contains the map plus a head and tail item,
6123 	 * where the head holds the module and length of array, and the
6124 	 * tail holds a pointer to the next list.
6125 	 */
6126 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6127 	if (!map_array) {
6128 		pr_warn("Unable to allocate trace eval mapping\n");
6129 		return;
6130 	}
6131 
6132 	mutex_lock(&trace_eval_mutex);
6133 
6134 	if (!trace_eval_maps)
6135 		trace_eval_maps = map_array;
6136 	else {
6137 		ptr = trace_eval_maps;
6138 		for (;;) {
6139 			ptr = trace_eval_jmp_to_tail(ptr);
6140 			if (!ptr->tail.next)
6141 				break;
6142 			ptr = ptr->tail.next;
6143 
6144 		}
6145 		ptr->tail.next = map_array;
6146 	}
6147 	map_array->head.mod = mod;
6148 	map_array->head.length = len;
6149 	map_array++;
6150 
6151 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6152 		map_array->map = **map;
6153 		map_array++;
6154 	}
6155 	memset(map_array, 0, sizeof(*map_array));
6156 
6157 	mutex_unlock(&trace_eval_mutex);
6158 }
6159 
trace_create_eval_file(struct dentry * d_tracer)6160 static void trace_create_eval_file(struct dentry *d_tracer)
6161 {
6162 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6163 			  NULL, &tracing_eval_map_fops);
6164 }
6165 
6166 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)6167 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)6168 static inline void trace_insert_eval_map_file(struct module *mod,
6169 			      struct trace_eval_map **start, int len) { }
6170 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6171 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)6172 static void trace_insert_eval_map(struct module *mod,
6173 				  struct trace_eval_map **start, int len)
6174 {
6175 	struct trace_eval_map **map;
6176 
6177 	if (len <= 0)
6178 		return;
6179 
6180 	map = start;
6181 
6182 	trace_event_eval_update(map, len);
6183 
6184 	trace_insert_eval_map_file(mod, start, len);
6185 }
6186 
6187 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6188 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6189 		       size_t cnt, loff_t *ppos)
6190 {
6191 	struct trace_array *tr = filp->private_data;
6192 	char buf[MAX_TRACER_SIZE+2];
6193 	int r;
6194 
6195 	mutex_lock(&trace_types_lock);
6196 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6197 	mutex_unlock(&trace_types_lock);
6198 
6199 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6200 }
6201 
tracer_init(struct tracer * t,struct trace_array * tr)6202 int tracer_init(struct tracer *t, struct trace_array *tr)
6203 {
6204 	tracing_reset_online_cpus(&tr->array_buffer);
6205 	return t->init(tr);
6206 }
6207 
set_buffer_entries(struct array_buffer * buf,unsigned long val)6208 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6209 {
6210 	int cpu;
6211 
6212 	for_each_tracing_cpu(cpu)
6213 		per_cpu_ptr(buf->data, cpu)->entries = val;
6214 }
6215 
update_buffer_entries(struct array_buffer * buf,int cpu)6216 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6217 {
6218 	if (cpu == RING_BUFFER_ALL_CPUS) {
6219 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6220 	} else {
6221 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6222 	}
6223 }
6224 
6225 #ifdef CONFIG_TRACER_MAX_TRACE
6226 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)6227 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6228 					struct array_buffer *size_buf, int cpu_id)
6229 {
6230 	int cpu, ret = 0;
6231 
6232 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6233 		for_each_tracing_cpu(cpu) {
6234 			ret = ring_buffer_resize(trace_buf->buffer,
6235 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6236 			if (ret < 0)
6237 				break;
6238 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6239 				per_cpu_ptr(size_buf->data, cpu)->entries;
6240 		}
6241 	} else {
6242 		ret = ring_buffer_resize(trace_buf->buffer,
6243 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6244 		if (ret == 0)
6245 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6246 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6247 	}
6248 
6249 	return ret;
6250 }
6251 #endif /* CONFIG_TRACER_MAX_TRACE */
6252 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6253 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6254 					unsigned long size, int cpu)
6255 {
6256 	int ret;
6257 
6258 	/*
6259 	 * If kernel or user changes the size of the ring buffer
6260 	 * we use the size that was given, and we can forget about
6261 	 * expanding it later.
6262 	 */
6263 	ring_buffer_expanded = true;
6264 
6265 	/* May be called before buffers are initialized */
6266 	if (!tr->array_buffer.buffer)
6267 		return 0;
6268 
6269 	/* Do not allow tracing while resizing ring buffer */
6270 	tracing_stop_tr(tr);
6271 
6272 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6273 	if (ret < 0)
6274 		goto out_start;
6275 
6276 #ifdef CONFIG_TRACER_MAX_TRACE
6277 	if (!tr->allocated_snapshot)
6278 		goto out;
6279 
6280 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6281 	if (ret < 0) {
6282 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6283 						     &tr->array_buffer, cpu);
6284 		if (r < 0) {
6285 			/*
6286 			 * AARGH! We are left with different
6287 			 * size max buffer!!!!
6288 			 * The max buffer is our "snapshot" buffer.
6289 			 * When a tracer needs a snapshot (one of the
6290 			 * latency tracers), it swaps the max buffer
6291 			 * with the saved snap shot. We succeeded to
6292 			 * update the size of the main buffer, but failed to
6293 			 * update the size of the max buffer. But when we tried
6294 			 * to reset the main buffer to the original size, we
6295 			 * failed there too. This is very unlikely to
6296 			 * happen, but if it does, warn and kill all
6297 			 * tracing.
6298 			 */
6299 			WARN_ON(1);
6300 			tracing_disabled = 1;
6301 		}
6302 		goto out_start;
6303 	}
6304 
6305 	update_buffer_entries(&tr->max_buffer, cpu);
6306 
6307  out:
6308 #endif /* CONFIG_TRACER_MAX_TRACE */
6309 
6310 	update_buffer_entries(&tr->array_buffer, cpu);
6311  out_start:
6312 	tracing_start_tr(tr);
6313 	return ret;
6314 }
6315 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6316 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6317 				  unsigned long size, int cpu_id)
6318 {
6319 	int ret;
6320 
6321 	mutex_lock(&trace_types_lock);
6322 
6323 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6324 		/* make sure, this cpu is enabled in the mask */
6325 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6326 			ret = -EINVAL;
6327 			goto out;
6328 		}
6329 	}
6330 
6331 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6332 	if (ret < 0)
6333 		ret = -ENOMEM;
6334 
6335 out:
6336 	mutex_unlock(&trace_types_lock);
6337 
6338 	return ret;
6339 }
6340 
6341 
6342 /**
6343  * tracing_update_buffers - used by tracing facility to expand ring buffers
6344  *
6345  * To save on memory when the tracing is never used on a system with it
6346  * configured in. The ring buffers are set to a minimum size. But once
6347  * a user starts to use the tracing facility, then they need to grow
6348  * to their default size.
6349  *
6350  * This function is to be called when a tracer is about to be used.
6351  */
tracing_update_buffers(void)6352 int tracing_update_buffers(void)
6353 {
6354 	int ret = 0;
6355 
6356 	mutex_lock(&trace_types_lock);
6357 	if (!ring_buffer_expanded)
6358 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6359 						RING_BUFFER_ALL_CPUS);
6360 	mutex_unlock(&trace_types_lock);
6361 
6362 	return ret;
6363 }
6364 
6365 struct trace_option_dentry;
6366 
6367 static void
6368 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6369 
6370 /*
6371  * Used to clear out the tracer before deletion of an instance.
6372  * Must have trace_types_lock held.
6373  */
tracing_set_nop(struct trace_array * tr)6374 static void tracing_set_nop(struct trace_array *tr)
6375 {
6376 	if (tr->current_trace == &nop_trace)
6377 		return;
6378 
6379 	tr->current_trace->enabled--;
6380 
6381 	if (tr->current_trace->reset)
6382 		tr->current_trace->reset(tr);
6383 
6384 	tr->current_trace = &nop_trace;
6385 }
6386 
6387 static bool tracer_options_updated;
6388 
add_tracer_options(struct trace_array * tr,struct tracer * t)6389 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6390 {
6391 	/* Only enable if the directory has been created already. */
6392 	if (!tr->dir)
6393 		return;
6394 
6395 	/* Only create trace option files after update_tracer_options finish */
6396 	if (!tracer_options_updated)
6397 		return;
6398 
6399 	create_trace_option_files(tr, t);
6400 }
6401 
tracing_set_tracer(struct trace_array * tr,const char * buf)6402 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6403 {
6404 	struct tracer *t;
6405 #ifdef CONFIG_TRACER_MAX_TRACE
6406 	bool had_max_tr;
6407 #endif
6408 	int ret = 0;
6409 
6410 	mutex_lock(&trace_types_lock);
6411 
6412 	if (!ring_buffer_expanded) {
6413 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6414 						RING_BUFFER_ALL_CPUS);
6415 		if (ret < 0)
6416 			goto out;
6417 		ret = 0;
6418 	}
6419 
6420 	for (t = trace_types; t; t = t->next) {
6421 		if (strcmp(t->name, buf) == 0)
6422 			break;
6423 	}
6424 	if (!t) {
6425 		ret = -EINVAL;
6426 		goto out;
6427 	}
6428 	if (t == tr->current_trace)
6429 		goto out;
6430 
6431 #ifdef CONFIG_TRACER_SNAPSHOT
6432 	if (t->use_max_tr) {
6433 		local_irq_disable();
6434 		arch_spin_lock(&tr->max_lock);
6435 		if (tr->cond_snapshot)
6436 			ret = -EBUSY;
6437 		arch_spin_unlock(&tr->max_lock);
6438 		local_irq_enable();
6439 		if (ret)
6440 			goto out;
6441 	}
6442 #endif
6443 	/* Some tracers won't work on kernel command line */
6444 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6445 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6446 			t->name);
6447 		goto out;
6448 	}
6449 
6450 	/* Some tracers are only allowed for the top level buffer */
6451 	if (!trace_ok_for_array(t, tr)) {
6452 		ret = -EINVAL;
6453 		goto out;
6454 	}
6455 
6456 	/* If trace pipe files are being read, we can't change the tracer */
6457 	if (tr->trace_ref) {
6458 		ret = -EBUSY;
6459 		goto out;
6460 	}
6461 
6462 	trace_branch_disable();
6463 
6464 	tr->current_trace->enabled--;
6465 
6466 	if (tr->current_trace->reset)
6467 		tr->current_trace->reset(tr);
6468 
6469 #ifdef CONFIG_TRACER_MAX_TRACE
6470 	had_max_tr = tr->current_trace->use_max_tr;
6471 
6472 	/* Current trace needs to be nop_trace before synchronize_rcu */
6473 	tr->current_trace = &nop_trace;
6474 
6475 	if (had_max_tr && !t->use_max_tr) {
6476 		/*
6477 		 * We need to make sure that the update_max_tr sees that
6478 		 * current_trace changed to nop_trace to keep it from
6479 		 * swapping the buffers after we resize it.
6480 		 * The update_max_tr is called from interrupts disabled
6481 		 * so a synchronized_sched() is sufficient.
6482 		 */
6483 		synchronize_rcu();
6484 		free_snapshot(tr);
6485 	}
6486 
6487 	if (t->use_max_tr && !tr->allocated_snapshot) {
6488 		ret = tracing_alloc_snapshot_instance(tr);
6489 		if (ret < 0)
6490 			goto out;
6491 	}
6492 #else
6493 	tr->current_trace = &nop_trace;
6494 #endif
6495 
6496 	if (t->init) {
6497 		ret = tracer_init(t, tr);
6498 		if (ret)
6499 			goto out;
6500 	}
6501 
6502 	tr->current_trace = t;
6503 	tr->current_trace->enabled++;
6504 	trace_branch_enable(tr);
6505  out:
6506 	mutex_unlock(&trace_types_lock);
6507 
6508 	return ret;
6509 }
6510 
6511 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6512 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6513 			size_t cnt, loff_t *ppos)
6514 {
6515 	struct trace_array *tr = filp->private_data;
6516 	char buf[MAX_TRACER_SIZE+1];
6517 	int i;
6518 	size_t ret;
6519 	int err;
6520 
6521 	ret = cnt;
6522 
6523 	if (cnt > MAX_TRACER_SIZE)
6524 		cnt = MAX_TRACER_SIZE;
6525 
6526 	if (copy_from_user(buf, ubuf, cnt))
6527 		return -EFAULT;
6528 
6529 	buf[cnt] = 0;
6530 
6531 	/* strip ending whitespace. */
6532 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6533 		buf[i] = 0;
6534 
6535 	err = tracing_set_tracer(tr, buf);
6536 	if (err)
6537 		return err;
6538 
6539 	*ppos += ret;
6540 
6541 	return ret;
6542 }
6543 
6544 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6545 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6546 		   size_t cnt, loff_t *ppos)
6547 {
6548 	char buf[64];
6549 	int r;
6550 
6551 	r = snprintf(buf, sizeof(buf), "%ld\n",
6552 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6553 	if (r > sizeof(buf))
6554 		r = sizeof(buf);
6555 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6556 }
6557 
6558 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6559 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6560 		    size_t cnt, loff_t *ppos)
6561 {
6562 	unsigned long val;
6563 	int ret;
6564 
6565 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6566 	if (ret)
6567 		return ret;
6568 
6569 	*ptr = val * 1000;
6570 
6571 	return cnt;
6572 }
6573 
6574 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6575 tracing_thresh_read(struct file *filp, char __user *ubuf,
6576 		    size_t cnt, loff_t *ppos)
6577 {
6578 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6579 }
6580 
6581 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6582 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6583 		     size_t cnt, loff_t *ppos)
6584 {
6585 	struct trace_array *tr = filp->private_data;
6586 	int ret;
6587 
6588 	mutex_lock(&trace_types_lock);
6589 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6590 	if (ret < 0)
6591 		goto out;
6592 
6593 	if (tr->current_trace->update_thresh) {
6594 		ret = tr->current_trace->update_thresh(tr);
6595 		if (ret < 0)
6596 			goto out;
6597 	}
6598 
6599 	ret = cnt;
6600 out:
6601 	mutex_unlock(&trace_types_lock);
6602 
6603 	return ret;
6604 }
6605 
6606 #ifdef CONFIG_TRACER_MAX_TRACE
6607 
6608 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6609 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6610 		     size_t cnt, loff_t *ppos)
6611 {
6612 	struct trace_array *tr = filp->private_data;
6613 
6614 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6615 }
6616 
6617 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6618 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6619 		      size_t cnt, loff_t *ppos)
6620 {
6621 	struct trace_array *tr = filp->private_data;
6622 
6623 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6624 }
6625 
6626 #endif
6627 
tracing_open_pipe(struct inode * inode,struct file * filp)6628 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6629 {
6630 	struct trace_array *tr = inode->i_private;
6631 	struct trace_iterator *iter;
6632 	int ret;
6633 
6634 	ret = tracing_check_open_get_tr(tr);
6635 	if (ret)
6636 		return ret;
6637 
6638 	mutex_lock(&trace_types_lock);
6639 
6640 	/* create a buffer to store the information to pass to userspace */
6641 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6642 	if (!iter) {
6643 		ret = -ENOMEM;
6644 		__trace_array_put(tr);
6645 		goto out;
6646 	}
6647 
6648 	trace_seq_init(&iter->seq);
6649 	iter->trace = tr->current_trace;
6650 
6651 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6652 		ret = -ENOMEM;
6653 		goto fail;
6654 	}
6655 
6656 	/* trace pipe does not show start of buffer */
6657 	cpumask_setall(iter->started);
6658 
6659 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6660 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6661 
6662 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6663 	if (trace_clocks[tr->clock_id].in_ns)
6664 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6665 
6666 	iter->tr = tr;
6667 	iter->array_buffer = &tr->array_buffer;
6668 	iter->cpu_file = tracing_get_cpu(inode);
6669 	mutex_init(&iter->mutex);
6670 	filp->private_data = iter;
6671 
6672 	if (iter->trace->pipe_open)
6673 		iter->trace->pipe_open(iter);
6674 
6675 	nonseekable_open(inode, filp);
6676 
6677 	tr->trace_ref++;
6678 out:
6679 	mutex_unlock(&trace_types_lock);
6680 	return ret;
6681 
6682 fail:
6683 	kfree(iter);
6684 	__trace_array_put(tr);
6685 	mutex_unlock(&trace_types_lock);
6686 	return ret;
6687 }
6688 
tracing_release_pipe(struct inode * inode,struct file * file)6689 static int tracing_release_pipe(struct inode *inode, struct file *file)
6690 {
6691 	struct trace_iterator *iter = file->private_data;
6692 	struct trace_array *tr = inode->i_private;
6693 
6694 	mutex_lock(&trace_types_lock);
6695 
6696 	tr->trace_ref--;
6697 
6698 	if (iter->trace->pipe_close)
6699 		iter->trace->pipe_close(iter);
6700 
6701 	mutex_unlock(&trace_types_lock);
6702 
6703 	free_cpumask_var(iter->started);
6704 	kfree(iter->fmt);
6705 	kfree(iter->temp);
6706 	mutex_destroy(&iter->mutex);
6707 	kfree(iter);
6708 
6709 	trace_array_put(tr);
6710 
6711 	return 0;
6712 }
6713 
6714 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6715 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6716 {
6717 	struct trace_array *tr = iter->tr;
6718 
6719 	/* Iterators are static, they should be filled or empty */
6720 	if (trace_buffer_iter(iter, iter->cpu_file))
6721 		return EPOLLIN | EPOLLRDNORM;
6722 
6723 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6724 		/*
6725 		 * Always select as readable when in blocking mode
6726 		 */
6727 		return EPOLLIN | EPOLLRDNORM;
6728 	else
6729 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6730 					     filp, poll_table, iter->tr->buffer_percent);
6731 }
6732 
6733 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6734 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6735 {
6736 	struct trace_iterator *iter = filp->private_data;
6737 
6738 	return trace_poll(iter, filp, poll_table);
6739 }
6740 
6741 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6742 static int tracing_wait_pipe(struct file *filp)
6743 {
6744 	struct trace_iterator *iter = filp->private_data;
6745 	int ret;
6746 
6747 	while (trace_empty(iter)) {
6748 
6749 		if ((filp->f_flags & O_NONBLOCK)) {
6750 			return -EAGAIN;
6751 		}
6752 
6753 		/*
6754 		 * We block until we read something and tracing is disabled.
6755 		 * We still block if tracing is disabled, but we have never
6756 		 * read anything. This allows a user to cat this file, and
6757 		 * then enable tracing. But after we have read something,
6758 		 * we give an EOF when tracing is again disabled.
6759 		 *
6760 		 * iter->pos will be 0 if we haven't read anything.
6761 		 */
6762 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6763 			break;
6764 
6765 		mutex_unlock(&iter->mutex);
6766 
6767 		ret = wait_on_pipe(iter, 0);
6768 
6769 		mutex_lock(&iter->mutex);
6770 
6771 		if (ret)
6772 			return ret;
6773 	}
6774 
6775 	return 1;
6776 }
6777 
6778 /*
6779  * Consumer reader.
6780  */
6781 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6782 tracing_read_pipe(struct file *filp, char __user *ubuf,
6783 		  size_t cnt, loff_t *ppos)
6784 {
6785 	struct trace_iterator *iter = filp->private_data;
6786 	ssize_t sret;
6787 
6788 	/*
6789 	 * Avoid more than one consumer on a single file descriptor
6790 	 * This is just a matter of traces coherency, the ring buffer itself
6791 	 * is protected.
6792 	 */
6793 	mutex_lock(&iter->mutex);
6794 
6795 	/* return any leftover data */
6796 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6797 	if (sret != -EBUSY)
6798 		goto out;
6799 
6800 	trace_seq_init(&iter->seq);
6801 
6802 	if (iter->trace->read) {
6803 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6804 		if (sret)
6805 			goto out;
6806 	}
6807 
6808 waitagain:
6809 	sret = tracing_wait_pipe(filp);
6810 	if (sret <= 0)
6811 		goto out;
6812 
6813 	/* stop when tracing is finished */
6814 	if (trace_empty(iter)) {
6815 		sret = 0;
6816 		goto out;
6817 	}
6818 
6819 	if (cnt >= PAGE_SIZE)
6820 		cnt = PAGE_SIZE - 1;
6821 
6822 	/* reset all but tr, trace, and overruns */
6823 	memset(&iter->seq, 0,
6824 	       sizeof(struct trace_iterator) -
6825 	       offsetof(struct trace_iterator, seq));
6826 	cpumask_clear(iter->started);
6827 	trace_seq_init(&iter->seq);
6828 	iter->pos = -1;
6829 
6830 	trace_event_read_lock();
6831 	trace_access_lock(iter->cpu_file);
6832 	while (trace_find_next_entry_inc(iter) != NULL) {
6833 		enum print_line_t ret;
6834 		int save_len = iter->seq.seq.len;
6835 
6836 		ret = print_trace_line(iter);
6837 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6838 			/*
6839 			 * If one print_trace_line() fills entire trace_seq in one shot,
6840 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6841 			 * In this case, we need to consume it, otherwise, loop will peek
6842 			 * this event next time, resulting in an infinite loop.
6843 			 */
6844 			if (save_len == 0) {
6845 				iter->seq.full = 0;
6846 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6847 				trace_consume(iter);
6848 				break;
6849 			}
6850 
6851 			/* In other cases, don't print partial lines */
6852 			iter->seq.seq.len = save_len;
6853 			break;
6854 		}
6855 		if (ret != TRACE_TYPE_NO_CONSUME)
6856 			trace_consume(iter);
6857 
6858 		if (trace_seq_used(&iter->seq) >= cnt)
6859 			break;
6860 
6861 		/*
6862 		 * Setting the full flag means we reached the trace_seq buffer
6863 		 * size and we should leave by partial output condition above.
6864 		 * One of the trace_seq_* functions is not used properly.
6865 		 */
6866 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6867 			  iter->ent->type);
6868 	}
6869 	trace_access_unlock(iter->cpu_file);
6870 	trace_event_read_unlock();
6871 
6872 	/* Now copy what we have to the user */
6873 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6874 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6875 		trace_seq_init(&iter->seq);
6876 
6877 	/*
6878 	 * If there was nothing to send to user, in spite of consuming trace
6879 	 * entries, go back to wait for more entries.
6880 	 */
6881 	if (sret == -EBUSY)
6882 		goto waitagain;
6883 
6884 out:
6885 	mutex_unlock(&iter->mutex);
6886 
6887 	return sret;
6888 }
6889 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6890 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6891 				     unsigned int idx)
6892 {
6893 	__free_page(spd->pages[idx]);
6894 }
6895 
6896 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6897 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6898 {
6899 	size_t count;
6900 	int save_len;
6901 	int ret;
6902 
6903 	/* Seq buffer is page-sized, exactly what we need. */
6904 	for (;;) {
6905 		save_len = iter->seq.seq.len;
6906 		ret = print_trace_line(iter);
6907 
6908 		if (trace_seq_has_overflowed(&iter->seq)) {
6909 			iter->seq.seq.len = save_len;
6910 			break;
6911 		}
6912 
6913 		/*
6914 		 * This should not be hit, because it should only
6915 		 * be set if the iter->seq overflowed. But check it
6916 		 * anyway to be safe.
6917 		 */
6918 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6919 			iter->seq.seq.len = save_len;
6920 			break;
6921 		}
6922 
6923 		count = trace_seq_used(&iter->seq) - save_len;
6924 		if (rem < count) {
6925 			rem = 0;
6926 			iter->seq.seq.len = save_len;
6927 			break;
6928 		}
6929 
6930 		if (ret != TRACE_TYPE_NO_CONSUME)
6931 			trace_consume(iter);
6932 		rem -= count;
6933 		if (!trace_find_next_entry_inc(iter))	{
6934 			rem = 0;
6935 			iter->ent = NULL;
6936 			break;
6937 		}
6938 	}
6939 
6940 	return rem;
6941 }
6942 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6943 static ssize_t tracing_splice_read_pipe(struct file *filp,
6944 					loff_t *ppos,
6945 					struct pipe_inode_info *pipe,
6946 					size_t len,
6947 					unsigned int flags)
6948 {
6949 	struct page *pages_def[PIPE_DEF_BUFFERS];
6950 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6951 	struct trace_iterator *iter = filp->private_data;
6952 	struct splice_pipe_desc spd = {
6953 		.pages		= pages_def,
6954 		.partial	= partial_def,
6955 		.nr_pages	= 0, /* This gets updated below. */
6956 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6957 		.ops		= &default_pipe_buf_ops,
6958 		.spd_release	= tracing_spd_release_pipe,
6959 	};
6960 	ssize_t ret;
6961 	size_t rem;
6962 	unsigned int i;
6963 
6964 	if (splice_grow_spd(pipe, &spd))
6965 		return -ENOMEM;
6966 
6967 	mutex_lock(&iter->mutex);
6968 
6969 	if (iter->trace->splice_read) {
6970 		ret = iter->trace->splice_read(iter, filp,
6971 					       ppos, pipe, len, flags);
6972 		if (ret)
6973 			goto out_err;
6974 	}
6975 
6976 	ret = tracing_wait_pipe(filp);
6977 	if (ret <= 0)
6978 		goto out_err;
6979 
6980 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6981 		ret = -EFAULT;
6982 		goto out_err;
6983 	}
6984 
6985 	trace_event_read_lock();
6986 	trace_access_lock(iter->cpu_file);
6987 
6988 	/* Fill as many pages as possible. */
6989 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6990 		spd.pages[i] = alloc_page(GFP_KERNEL);
6991 		if (!spd.pages[i])
6992 			break;
6993 
6994 		rem = tracing_fill_pipe_page(rem, iter);
6995 
6996 		/* Copy the data into the page, so we can start over. */
6997 		ret = trace_seq_to_buffer(&iter->seq,
6998 					  page_address(spd.pages[i]),
6999 					  trace_seq_used(&iter->seq));
7000 		if (ret < 0) {
7001 			__free_page(spd.pages[i]);
7002 			break;
7003 		}
7004 		spd.partial[i].offset = 0;
7005 		spd.partial[i].len = trace_seq_used(&iter->seq);
7006 
7007 		trace_seq_init(&iter->seq);
7008 	}
7009 
7010 	trace_access_unlock(iter->cpu_file);
7011 	trace_event_read_unlock();
7012 	mutex_unlock(&iter->mutex);
7013 
7014 	spd.nr_pages = i;
7015 
7016 	if (i)
7017 		ret = splice_to_pipe(pipe, &spd);
7018 	else
7019 		ret = 0;
7020 out:
7021 	splice_shrink_spd(&spd);
7022 	return ret;
7023 
7024 out_err:
7025 	mutex_unlock(&iter->mutex);
7026 	goto out;
7027 }
7028 
7029 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7030 tracing_entries_read(struct file *filp, char __user *ubuf,
7031 		     size_t cnt, loff_t *ppos)
7032 {
7033 	struct inode *inode = file_inode(filp);
7034 	struct trace_array *tr = inode->i_private;
7035 	int cpu = tracing_get_cpu(inode);
7036 	char buf[64];
7037 	int r = 0;
7038 	ssize_t ret;
7039 
7040 	mutex_lock(&trace_types_lock);
7041 
7042 	if (cpu == RING_BUFFER_ALL_CPUS) {
7043 		int cpu, buf_size_same;
7044 		unsigned long size;
7045 
7046 		size = 0;
7047 		buf_size_same = 1;
7048 		/* check if all cpu sizes are same */
7049 		for_each_tracing_cpu(cpu) {
7050 			/* fill in the size from first enabled cpu */
7051 			if (size == 0)
7052 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7053 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7054 				buf_size_same = 0;
7055 				break;
7056 			}
7057 		}
7058 
7059 		if (buf_size_same) {
7060 			if (!ring_buffer_expanded)
7061 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7062 					    size >> 10,
7063 					    trace_buf_size >> 10);
7064 			else
7065 				r = sprintf(buf, "%lu\n", size >> 10);
7066 		} else
7067 			r = sprintf(buf, "X\n");
7068 	} else
7069 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7070 
7071 	mutex_unlock(&trace_types_lock);
7072 
7073 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7074 	return ret;
7075 }
7076 
7077 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7078 tracing_entries_write(struct file *filp, const char __user *ubuf,
7079 		      size_t cnt, loff_t *ppos)
7080 {
7081 	struct inode *inode = file_inode(filp);
7082 	struct trace_array *tr = inode->i_private;
7083 	unsigned long val;
7084 	int ret;
7085 
7086 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7087 	if (ret)
7088 		return ret;
7089 
7090 	/* must have at least 1 entry */
7091 	if (!val)
7092 		return -EINVAL;
7093 
7094 	/* value is in KB */
7095 	val <<= 10;
7096 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7097 	if (ret < 0)
7098 		return ret;
7099 
7100 	*ppos += cnt;
7101 
7102 	return cnt;
7103 }
7104 
7105 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7106 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7107 				size_t cnt, loff_t *ppos)
7108 {
7109 	struct trace_array *tr = filp->private_data;
7110 	char buf[64];
7111 	int r, cpu;
7112 	unsigned long size = 0, expanded_size = 0;
7113 
7114 	mutex_lock(&trace_types_lock);
7115 	for_each_tracing_cpu(cpu) {
7116 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7117 		if (!ring_buffer_expanded)
7118 			expanded_size += trace_buf_size >> 10;
7119 	}
7120 	if (ring_buffer_expanded)
7121 		r = sprintf(buf, "%lu\n", size);
7122 	else
7123 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7124 	mutex_unlock(&trace_types_lock);
7125 
7126 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7127 }
7128 
7129 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7130 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7131 			  size_t cnt, loff_t *ppos)
7132 {
7133 	/*
7134 	 * There is no need to read what the user has written, this function
7135 	 * is just to make sure that there is no error when "echo" is used
7136 	 */
7137 
7138 	*ppos += cnt;
7139 
7140 	return cnt;
7141 }
7142 
7143 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7144 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7145 {
7146 	struct trace_array *tr = inode->i_private;
7147 
7148 	/* disable tracing ? */
7149 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7150 		tracer_tracing_off(tr);
7151 	/* resize the ring buffer to 0 */
7152 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7153 
7154 	trace_array_put(tr);
7155 
7156 	return 0;
7157 }
7158 
7159 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7160 tracing_mark_write(struct file *filp, const char __user *ubuf,
7161 					size_t cnt, loff_t *fpos)
7162 {
7163 	struct trace_array *tr = filp->private_data;
7164 	struct ring_buffer_event *event;
7165 	enum event_trigger_type tt = ETT_NONE;
7166 	struct trace_buffer *buffer;
7167 	struct print_entry *entry;
7168 	ssize_t written;
7169 	int size;
7170 	int len;
7171 
7172 /* Used in tracing_mark_raw_write() as well */
7173 #define FAULTED_STR "<faulted>"
7174 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7175 
7176 	if (tracing_disabled)
7177 		return -EINVAL;
7178 
7179 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7180 		return -EINVAL;
7181 
7182 	if (cnt > TRACE_BUF_SIZE)
7183 		cnt = TRACE_BUF_SIZE;
7184 
7185 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7186 
7187 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7188 
7189 	/* If less than "<faulted>", then make sure we can still add that */
7190 	if (cnt < FAULTED_SIZE)
7191 		size += FAULTED_SIZE - cnt;
7192 
7193 	buffer = tr->array_buffer.buffer;
7194 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7195 					    tracing_gen_ctx());
7196 	if (unlikely(!event))
7197 		/* Ring buffer disabled, return as if not open for write */
7198 		return -EBADF;
7199 
7200 	entry = ring_buffer_event_data(event);
7201 	entry->ip = _THIS_IP_;
7202 
7203 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7204 	if (len) {
7205 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7206 		cnt = FAULTED_SIZE;
7207 		written = -EFAULT;
7208 	} else
7209 		written = cnt;
7210 
7211 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7212 		/* do not add \n before testing triggers, but add \0 */
7213 		entry->buf[cnt] = '\0';
7214 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7215 	}
7216 
7217 	if (entry->buf[cnt - 1] != '\n') {
7218 		entry->buf[cnt] = '\n';
7219 		entry->buf[cnt + 1] = '\0';
7220 	} else
7221 		entry->buf[cnt] = '\0';
7222 
7223 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7224 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7225 	__buffer_unlock_commit(buffer, event);
7226 
7227 	if (tt)
7228 		event_triggers_post_call(tr->trace_marker_file, tt);
7229 
7230 	return written;
7231 }
7232 
7233 /* Limit it for now to 3K (including tag) */
7234 #define RAW_DATA_MAX_SIZE (1024*3)
7235 
7236 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7237 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7238 					size_t cnt, loff_t *fpos)
7239 {
7240 	struct trace_array *tr = filp->private_data;
7241 	struct ring_buffer_event *event;
7242 	struct trace_buffer *buffer;
7243 	struct raw_data_entry *entry;
7244 	ssize_t written;
7245 	int size;
7246 	int len;
7247 
7248 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7249 
7250 	if (tracing_disabled)
7251 		return -EINVAL;
7252 
7253 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7254 		return -EINVAL;
7255 
7256 	/* The marker must at least have a tag id */
7257 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7258 		return -EINVAL;
7259 
7260 	if (cnt > TRACE_BUF_SIZE)
7261 		cnt = TRACE_BUF_SIZE;
7262 
7263 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7264 
7265 	size = sizeof(*entry) + cnt;
7266 	if (cnt < FAULT_SIZE_ID)
7267 		size += FAULT_SIZE_ID - cnt;
7268 
7269 	buffer = tr->array_buffer.buffer;
7270 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7271 					    tracing_gen_ctx());
7272 	if (!event)
7273 		/* Ring buffer disabled, return as if not open for write */
7274 		return -EBADF;
7275 
7276 	entry = ring_buffer_event_data(event);
7277 
7278 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7279 	if (len) {
7280 		entry->id = -1;
7281 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7282 		written = -EFAULT;
7283 	} else
7284 		written = cnt;
7285 
7286 	__buffer_unlock_commit(buffer, event);
7287 
7288 	return written;
7289 }
7290 
tracing_clock_show(struct seq_file * m,void * v)7291 static int tracing_clock_show(struct seq_file *m, void *v)
7292 {
7293 	struct trace_array *tr = m->private;
7294 	int i;
7295 
7296 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7297 		seq_printf(m,
7298 			"%s%s%s%s", i ? " " : "",
7299 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7300 			i == tr->clock_id ? "]" : "");
7301 	seq_putc(m, '\n');
7302 
7303 	return 0;
7304 }
7305 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7306 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7307 {
7308 	int i;
7309 
7310 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7311 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7312 			break;
7313 	}
7314 	if (i == ARRAY_SIZE(trace_clocks))
7315 		return -EINVAL;
7316 
7317 	mutex_lock(&trace_types_lock);
7318 
7319 	tr->clock_id = i;
7320 
7321 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7322 
7323 	/*
7324 	 * New clock may not be consistent with the previous clock.
7325 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7326 	 */
7327 	tracing_reset_online_cpus(&tr->array_buffer);
7328 
7329 #ifdef CONFIG_TRACER_MAX_TRACE
7330 	if (tr->max_buffer.buffer)
7331 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7332 	tracing_reset_online_cpus(&tr->max_buffer);
7333 #endif
7334 
7335 	mutex_unlock(&trace_types_lock);
7336 
7337 	return 0;
7338 }
7339 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7340 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7341 				   size_t cnt, loff_t *fpos)
7342 {
7343 	struct seq_file *m = filp->private_data;
7344 	struct trace_array *tr = m->private;
7345 	char buf[64];
7346 	const char *clockstr;
7347 	int ret;
7348 
7349 	if (cnt >= sizeof(buf))
7350 		return -EINVAL;
7351 
7352 	if (copy_from_user(buf, ubuf, cnt))
7353 		return -EFAULT;
7354 
7355 	buf[cnt] = 0;
7356 
7357 	clockstr = strstrip(buf);
7358 
7359 	ret = tracing_set_clock(tr, clockstr);
7360 	if (ret)
7361 		return ret;
7362 
7363 	*fpos += cnt;
7364 
7365 	return cnt;
7366 }
7367 
tracing_clock_open(struct inode * inode,struct file * file)7368 static int tracing_clock_open(struct inode *inode, struct file *file)
7369 {
7370 	struct trace_array *tr = inode->i_private;
7371 	int ret;
7372 
7373 	ret = tracing_check_open_get_tr(tr);
7374 	if (ret)
7375 		return ret;
7376 
7377 	ret = single_open(file, tracing_clock_show, inode->i_private);
7378 	if (ret < 0)
7379 		trace_array_put(tr);
7380 
7381 	return ret;
7382 }
7383 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7384 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7385 {
7386 	struct trace_array *tr = m->private;
7387 
7388 	mutex_lock(&trace_types_lock);
7389 
7390 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7391 		seq_puts(m, "delta [absolute]\n");
7392 	else
7393 		seq_puts(m, "[delta] absolute\n");
7394 
7395 	mutex_unlock(&trace_types_lock);
7396 
7397 	return 0;
7398 }
7399 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7400 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7401 {
7402 	struct trace_array *tr = inode->i_private;
7403 	int ret;
7404 
7405 	ret = tracing_check_open_get_tr(tr);
7406 	if (ret)
7407 		return ret;
7408 
7409 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7410 	if (ret < 0)
7411 		trace_array_put(tr);
7412 
7413 	return ret;
7414 }
7415 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7416 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7417 {
7418 	if (rbe == this_cpu_read(trace_buffered_event))
7419 		return ring_buffer_time_stamp(buffer);
7420 
7421 	return ring_buffer_event_time_stamp(buffer, rbe);
7422 }
7423 
7424 /*
7425  * Set or disable using the per CPU trace_buffer_event when possible.
7426  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7427 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7428 {
7429 	int ret = 0;
7430 
7431 	mutex_lock(&trace_types_lock);
7432 
7433 	if (set && tr->no_filter_buffering_ref++)
7434 		goto out;
7435 
7436 	if (!set) {
7437 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7438 			ret = -EINVAL;
7439 			goto out;
7440 		}
7441 
7442 		--tr->no_filter_buffering_ref;
7443 	}
7444  out:
7445 	mutex_unlock(&trace_types_lock);
7446 
7447 	return ret;
7448 }
7449 
7450 struct ftrace_buffer_info {
7451 	struct trace_iterator	iter;
7452 	void			*spare;
7453 	unsigned int		spare_cpu;
7454 	unsigned int		read;
7455 };
7456 
7457 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7458 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7459 {
7460 	struct trace_array *tr = inode->i_private;
7461 	struct trace_iterator *iter;
7462 	struct seq_file *m;
7463 	int ret;
7464 
7465 	ret = tracing_check_open_get_tr(tr);
7466 	if (ret)
7467 		return ret;
7468 
7469 	if (file->f_mode & FMODE_READ) {
7470 		iter = __tracing_open(inode, file, true);
7471 		if (IS_ERR(iter))
7472 			ret = PTR_ERR(iter);
7473 	} else {
7474 		/* Writes still need the seq_file to hold the private data */
7475 		ret = -ENOMEM;
7476 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7477 		if (!m)
7478 			goto out;
7479 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7480 		if (!iter) {
7481 			kfree(m);
7482 			goto out;
7483 		}
7484 		ret = 0;
7485 
7486 		iter->tr = tr;
7487 		iter->array_buffer = &tr->max_buffer;
7488 		iter->cpu_file = tracing_get_cpu(inode);
7489 		m->private = iter;
7490 		file->private_data = m;
7491 	}
7492 out:
7493 	if (ret < 0)
7494 		trace_array_put(tr);
7495 
7496 	return ret;
7497 }
7498 
tracing_swap_cpu_buffer(void * tr)7499 static void tracing_swap_cpu_buffer(void *tr)
7500 {
7501 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7502 }
7503 
7504 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7505 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7506 		       loff_t *ppos)
7507 {
7508 	struct seq_file *m = filp->private_data;
7509 	struct trace_iterator *iter = m->private;
7510 	struct trace_array *tr = iter->tr;
7511 	unsigned long val;
7512 	int ret;
7513 
7514 	ret = tracing_update_buffers();
7515 	if (ret < 0)
7516 		return ret;
7517 
7518 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7519 	if (ret)
7520 		return ret;
7521 
7522 	mutex_lock(&trace_types_lock);
7523 
7524 	if (tr->current_trace->use_max_tr) {
7525 		ret = -EBUSY;
7526 		goto out;
7527 	}
7528 
7529 	local_irq_disable();
7530 	arch_spin_lock(&tr->max_lock);
7531 	if (tr->cond_snapshot)
7532 		ret = -EBUSY;
7533 	arch_spin_unlock(&tr->max_lock);
7534 	local_irq_enable();
7535 	if (ret)
7536 		goto out;
7537 
7538 	switch (val) {
7539 	case 0:
7540 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7541 			ret = -EINVAL;
7542 			break;
7543 		}
7544 		if (tr->allocated_snapshot)
7545 			free_snapshot(tr);
7546 		break;
7547 	case 1:
7548 /* Only allow per-cpu swap if the ring buffer supports it */
7549 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7550 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7551 			ret = -EINVAL;
7552 			break;
7553 		}
7554 #endif
7555 		if (tr->allocated_snapshot)
7556 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7557 					&tr->array_buffer, iter->cpu_file);
7558 		else
7559 			ret = tracing_alloc_snapshot_instance(tr);
7560 		if (ret < 0)
7561 			break;
7562 		/* Now, we're going to swap */
7563 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7564 			local_irq_disable();
7565 			update_max_tr(tr, current, smp_processor_id(), NULL);
7566 			local_irq_enable();
7567 		} else {
7568 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7569 						 (void *)tr, 1);
7570 		}
7571 		break;
7572 	default:
7573 		if (tr->allocated_snapshot) {
7574 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7575 				tracing_reset_online_cpus(&tr->max_buffer);
7576 			else
7577 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7578 		}
7579 		break;
7580 	}
7581 
7582 	if (ret >= 0) {
7583 		*ppos += cnt;
7584 		ret = cnt;
7585 	}
7586 out:
7587 	mutex_unlock(&trace_types_lock);
7588 	return ret;
7589 }
7590 
tracing_snapshot_release(struct inode * inode,struct file * file)7591 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7592 {
7593 	struct seq_file *m = file->private_data;
7594 	int ret;
7595 
7596 	ret = tracing_release(inode, file);
7597 
7598 	if (file->f_mode & FMODE_READ)
7599 		return ret;
7600 
7601 	/* If write only, the seq_file is just a stub */
7602 	if (m)
7603 		kfree(m->private);
7604 	kfree(m);
7605 
7606 	return 0;
7607 }
7608 
7609 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7610 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7611 				    size_t count, loff_t *ppos);
7612 static int tracing_buffers_release(struct inode *inode, struct file *file);
7613 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7614 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7615 
snapshot_raw_open(struct inode * inode,struct file * filp)7616 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7617 {
7618 	struct ftrace_buffer_info *info;
7619 	int ret;
7620 
7621 	/* The following checks for tracefs lockdown */
7622 	ret = tracing_buffers_open(inode, filp);
7623 	if (ret < 0)
7624 		return ret;
7625 
7626 	info = filp->private_data;
7627 
7628 	if (info->iter.trace->use_max_tr) {
7629 		tracing_buffers_release(inode, filp);
7630 		return -EBUSY;
7631 	}
7632 
7633 	info->iter.snapshot = true;
7634 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7635 
7636 	return ret;
7637 }
7638 
7639 #endif /* CONFIG_TRACER_SNAPSHOT */
7640 
7641 
7642 static const struct file_operations tracing_thresh_fops = {
7643 	.open		= tracing_open_generic,
7644 	.read		= tracing_thresh_read,
7645 	.write		= tracing_thresh_write,
7646 	.llseek		= generic_file_llseek,
7647 };
7648 
7649 #ifdef CONFIG_TRACER_MAX_TRACE
7650 static const struct file_operations tracing_max_lat_fops = {
7651 	.open		= tracing_open_generic_tr,
7652 	.read		= tracing_max_lat_read,
7653 	.write		= tracing_max_lat_write,
7654 	.llseek		= generic_file_llseek,
7655 	.release	= tracing_release_generic_tr,
7656 };
7657 #endif
7658 
7659 static const struct file_operations set_tracer_fops = {
7660 	.open		= tracing_open_generic_tr,
7661 	.read		= tracing_set_trace_read,
7662 	.write		= tracing_set_trace_write,
7663 	.llseek		= generic_file_llseek,
7664 	.release	= tracing_release_generic_tr,
7665 };
7666 
7667 static const struct file_operations tracing_pipe_fops = {
7668 	.open		= tracing_open_pipe,
7669 	.poll		= tracing_poll_pipe,
7670 	.read		= tracing_read_pipe,
7671 	.splice_read	= tracing_splice_read_pipe,
7672 	.release	= tracing_release_pipe,
7673 	.llseek		= no_llseek,
7674 };
7675 
7676 static const struct file_operations tracing_entries_fops = {
7677 	.open		= tracing_open_generic_tr,
7678 	.read		= tracing_entries_read,
7679 	.write		= tracing_entries_write,
7680 	.llseek		= generic_file_llseek,
7681 	.release	= tracing_release_generic_tr,
7682 };
7683 
7684 static const struct file_operations tracing_total_entries_fops = {
7685 	.open		= tracing_open_generic_tr,
7686 	.read		= tracing_total_entries_read,
7687 	.llseek		= generic_file_llseek,
7688 	.release	= tracing_release_generic_tr,
7689 };
7690 
7691 static const struct file_operations tracing_free_buffer_fops = {
7692 	.open		= tracing_open_generic_tr,
7693 	.write		= tracing_free_buffer_write,
7694 	.release	= tracing_free_buffer_release,
7695 };
7696 
7697 static const struct file_operations tracing_mark_fops = {
7698 	.open		= tracing_mark_open,
7699 	.write		= tracing_mark_write,
7700 	.release	= tracing_release_generic_tr,
7701 };
7702 
7703 static const struct file_operations tracing_mark_raw_fops = {
7704 	.open		= tracing_mark_open,
7705 	.write		= tracing_mark_raw_write,
7706 	.release	= tracing_release_generic_tr,
7707 };
7708 
7709 static const struct file_operations trace_clock_fops = {
7710 	.open		= tracing_clock_open,
7711 	.read		= seq_read,
7712 	.llseek		= seq_lseek,
7713 	.release	= tracing_single_release_tr,
7714 	.write		= tracing_clock_write,
7715 };
7716 
7717 static const struct file_operations trace_time_stamp_mode_fops = {
7718 	.open		= tracing_time_stamp_mode_open,
7719 	.read		= seq_read,
7720 	.llseek		= seq_lseek,
7721 	.release	= tracing_single_release_tr,
7722 };
7723 
7724 #ifdef CONFIG_TRACER_SNAPSHOT
7725 static const struct file_operations snapshot_fops = {
7726 	.open		= tracing_snapshot_open,
7727 	.read		= seq_read,
7728 	.write		= tracing_snapshot_write,
7729 	.llseek		= tracing_lseek,
7730 	.release	= tracing_snapshot_release,
7731 };
7732 
7733 static const struct file_operations snapshot_raw_fops = {
7734 	.open		= snapshot_raw_open,
7735 	.read		= tracing_buffers_read,
7736 	.release	= tracing_buffers_release,
7737 	.splice_read	= tracing_buffers_splice_read,
7738 	.llseek		= no_llseek,
7739 };
7740 
7741 #endif /* CONFIG_TRACER_SNAPSHOT */
7742 
7743 /*
7744  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7745  * @filp: The active open file structure
7746  * @ubuf: The userspace provided buffer to read value into
7747  * @cnt: The maximum number of bytes to read
7748  * @ppos: The current "file" position
7749  *
7750  * This function implements the write interface for a struct trace_min_max_param.
7751  * The filp->private_data must point to a trace_min_max_param structure that
7752  * defines where to write the value, the min and the max acceptable values,
7753  * and a lock to protect the write.
7754  */
7755 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7756 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7757 {
7758 	struct trace_min_max_param *param = filp->private_data;
7759 	u64 val;
7760 	int err;
7761 
7762 	if (!param)
7763 		return -EFAULT;
7764 
7765 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7766 	if (err)
7767 		return err;
7768 
7769 	if (param->lock)
7770 		mutex_lock(param->lock);
7771 
7772 	if (param->min && val < *param->min)
7773 		err = -EINVAL;
7774 
7775 	if (param->max && val > *param->max)
7776 		err = -EINVAL;
7777 
7778 	if (!err)
7779 		*param->val = val;
7780 
7781 	if (param->lock)
7782 		mutex_unlock(param->lock);
7783 
7784 	if (err)
7785 		return err;
7786 
7787 	return cnt;
7788 }
7789 
7790 /*
7791  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7792  * @filp: The active open file structure
7793  * @ubuf: The userspace provided buffer to read value into
7794  * @cnt: The maximum number of bytes to read
7795  * @ppos: The current "file" position
7796  *
7797  * This function implements the read interface for a struct trace_min_max_param.
7798  * The filp->private_data must point to a trace_min_max_param struct with valid
7799  * data.
7800  */
7801 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7802 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7803 {
7804 	struct trace_min_max_param *param = filp->private_data;
7805 	char buf[U64_STR_SIZE];
7806 	int len;
7807 	u64 val;
7808 
7809 	if (!param)
7810 		return -EFAULT;
7811 
7812 	val = *param->val;
7813 
7814 	if (cnt > sizeof(buf))
7815 		cnt = sizeof(buf);
7816 
7817 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7818 
7819 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7820 }
7821 
7822 const struct file_operations trace_min_max_fops = {
7823 	.open		= tracing_open_generic,
7824 	.read		= trace_min_max_read,
7825 	.write		= trace_min_max_write,
7826 };
7827 
7828 #define TRACING_LOG_ERRS_MAX	8
7829 #define TRACING_LOG_LOC_MAX	128
7830 
7831 #define CMD_PREFIX "  Command: "
7832 
7833 struct err_info {
7834 	const char	**errs;	/* ptr to loc-specific array of err strings */
7835 	u8		type;	/* index into errs -> specific err string */
7836 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7837 	u64		ts;
7838 };
7839 
7840 struct tracing_log_err {
7841 	struct list_head	list;
7842 	struct err_info		info;
7843 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7844 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7845 };
7846 
7847 static DEFINE_MUTEX(tracing_err_log_lock);
7848 
get_tracing_log_err(struct trace_array * tr)7849 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7850 {
7851 	struct tracing_log_err *err;
7852 
7853 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7854 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7855 		if (!err)
7856 			err = ERR_PTR(-ENOMEM);
7857 		else
7858 			tr->n_err_log_entries++;
7859 
7860 		return err;
7861 	}
7862 
7863 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7864 	list_del(&err->list);
7865 
7866 	return err;
7867 }
7868 
7869 /**
7870  * err_pos - find the position of a string within a command for error careting
7871  * @cmd: The tracing command that caused the error
7872  * @str: The string to position the caret at within @cmd
7873  *
7874  * Finds the position of the first occurrence of @str within @cmd.  The
7875  * return value can be passed to tracing_log_err() for caret placement
7876  * within @cmd.
7877  *
7878  * Returns the index within @cmd of the first occurrence of @str or 0
7879  * if @str was not found.
7880  */
err_pos(char * cmd,const char * str)7881 unsigned int err_pos(char *cmd, const char *str)
7882 {
7883 	char *found;
7884 
7885 	if (WARN_ON(!strlen(cmd)))
7886 		return 0;
7887 
7888 	found = strstr(cmd, str);
7889 	if (found)
7890 		return found - cmd;
7891 
7892 	return 0;
7893 }
7894 
7895 /**
7896  * tracing_log_err - write an error to the tracing error log
7897  * @tr: The associated trace array for the error (NULL for top level array)
7898  * @loc: A string describing where the error occurred
7899  * @cmd: The tracing command that caused the error
7900  * @errs: The array of loc-specific static error strings
7901  * @type: The index into errs[], which produces the specific static err string
7902  * @pos: The position the caret should be placed in the cmd
7903  *
7904  * Writes an error into tracing/error_log of the form:
7905  *
7906  * <loc>: error: <text>
7907  *   Command: <cmd>
7908  *              ^
7909  *
7910  * tracing/error_log is a small log file containing the last
7911  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7912  * unless there has been a tracing error, and the error log can be
7913  * cleared and have its memory freed by writing the empty string in
7914  * truncation mode to it i.e. echo > tracing/error_log.
7915  *
7916  * NOTE: the @errs array along with the @type param are used to
7917  * produce a static error string - this string is not copied and saved
7918  * when the error is logged - only a pointer to it is saved.  See
7919  * existing callers for examples of how static strings are typically
7920  * defined for use with tracing_log_err().
7921  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7922 void tracing_log_err(struct trace_array *tr,
7923 		     const char *loc, const char *cmd,
7924 		     const char **errs, u8 type, u8 pos)
7925 {
7926 	struct tracing_log_err *err;
7927 
7928 	if (!tr)
7929 		tr = &global_trace;
7930 
7931 	mutex_lock(&tracing_err_log_lock);
7932 	err = get_tracing_log_err(tr);
7933 	if (PTR_ERR(err) == -ENOMEM) {
7934 		mutex_unlock(&tracing_err_log_lock);
7935 		return;
7936 	}
7937 
7938 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7939 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7940 
7941 	err->info.errs = errs;
7942 	err->info.type = type;
7943 	err->info.pos = pos;
7944 	err->info.ts = local_clock();
7945 
7946 	list_add_tail(&err->list, &tr->err_log);
7947 	mutex_unlock(&tracing_err_log_lock);
7948 }
7949 
clear_tracing_err_log(struct trace_array * tr)7950 static void clear_tracing_err_log(struct trace_array *tr)
7951 {
7952 	struct tracing_log_err *err, *next;
7953 
7954 	mutex_lock(&tracing_err_log_lock);
7955 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7956 		list_del(&err->list);
7957 		kfree(err);
7958 	}
7959 
7960 	tr->n_err_log_entries = 0;
7961 	mutex_unlock(&tracing_err_log_lock);
7962 }
7963 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7964 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7965 {
7966 	struct trace_array *tr = m->private;
7967 
7968 	mutex_lock(&tracing_err_log_lock);
7969 
7970 	return seq_list_start(&tr->err_log, *pos);
7971 }
7972 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7973 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7974 {
7975 	struct trace_array *tr = m->private;
7976 
7977 	return seq_list_next(v, &tr->err_log, pos);
7978 }
7979 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7980 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7981 {
7982 	mutex_unlock(&tracing_err_log_lock);
7983 }
7984 
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7985 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7986 {
7987 	u8 i;
7988 
7989 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7990 		seq_putc(m, ' ');
7991 	for (i = 0; i < pos; i++)
7992 		seq_putc(m, ' ');
7993 	seq_puts(m, "^\n");
7994 }
7995 
tracing_err_log_seq_show(struct seq_file * m,void * v)7996 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7997 {
7998 	struct tracing_log_err *err = v;
7999 
8000 	if (err) {
8001 		const char *err_text = err->info.errs[err->info.type];
8002 		u64 sec = err->info.ts;
8003 		u32 nsec;
8004 
8005 		nsec = do_div(sec, NSEC_PER_SEC);
8006 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8007 			   err->loc, err_text);
8008 		seq_printf(m, "%s", err->cmd);
8009 		tracing_err_log_show_pos(m, err->info.pos);
8010 	}
8011 
8012 	return 0;
8013 }
8014 
8015 static const struct seq_operations tracing_err_log_seq_ops = {
8016 	.start  = tracing_err_log_seq_start,
8017 	.next   = tracing_err_log_seq_next,
8018 	.stop   = tracing_err_log_seq_stop,
8019 	.show   = tracing_err_log_seq_show
8020 };
8021 
tracing_err_log_open(struct inode * inode,struct file * file)8022 static int tracing_err_log_open(struct inode *inode, struct file *file)
8023 {
8024 	struct trace_array *tr = inode->i_private;
8025 	int ret = 0;
8026 
8027 	ret = tracing_check_open_get_tr(tr);
8028 	if (ret)
8029 		return ret;
8030 
8031 	/* If this file was opened for write, then erase contents */
8032 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8033 		clear_tracing_err_log(tr);
8034 
8035 	if (file->f_mode & FMODE_READ) {
8036 		ret = seq_open(file, &tracing_err_log_seq_ops);
8037 		if (!ret) {
8038 			struct seq_file *m = file->private_data;
8039 			m->private = tr;
8040 		} else {
8041 			trace_array_put(tr);
8042 		}
8043 	}
8044 	return ret;
8045 }
8046 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8047 static ssize_t tracing_err_log_write(struct file *file,
8048 				     const char __user *buffer,
8049 				     size_t count, loff_t *ppos)
8050 {
8051 	return count;
8052 }
8053 
tracing_err_log_release(struct inode * inode,struct file * file)8054 static int tracing_err_log_release(struct inode *inode, struct file *file)
8055 {
8056 	struct trace_array *tr = inode->i_private;
8057 
8058 	trace_array_put(tr);
8059 
8060 	if (file->f_mode & FMODE_READ)
8061 		seq_release(inode, file);
8062 
8063 	return 0;
8064 }
8065 
8066 static const struct file_operations tracing_err_log_fops = {
8067 	.open           = tracing_err_log_open,
8068 	.write		= tracing_err_log_write,
8069 	.read           = seq_read,
8070 	.llseek         = tracing_lseek,
8071 	.release        = tracing_err_log_release,
8072 };
8073 
tracing_buffers_open(struct inode * inode,struct file * filp)8074 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8075 {
8076 	struct trace_array *tr = inode->i_private;
8077 	struct ftrace_buffer_info *info;
8078 	int ret;
8079 
8080 	ret = tracing_check_open_get_tr(tr);
8081 	if (ret)
8082 		return ret;
8083 
8084 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8085 	if (!info) {
8086 		trace_array_put(tr);
8087 		return -ENOMEM;
8088 	}
8089 
8090 	mutex_lock(&trace_types_lock);
8091 
8092 	info->iter.tr		= tr;
8093 	info->iter.cpu_file	= tracing_get_cpu(inode);
8094 	info->iter.trace	= tr->current_trace;
8095 	info->iter.array_buffer = &tr->array_buffer;
8096 	info->spare		= NULL;
8097 	/* Force reading ring buffer for first read */
8098 	info->read		= (unsigned int)-1;
8099 
8100 	filp->private_data = info;
8101 
8102 	tr->trace_ref++;
8103 
8104 	mutex_unlock(&trace_types_lock);
8105 
8106 	ret = nonseekable_open(inode, filp);
8107 	if (ret < 0)
8108 		trace_array_put(tr);
8109 
8110 	return ret;
8111 }
8112 
8113 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8114 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8115 {
8116 	struct ftrace_buffer_info *info = filp->private_data;
8117 	struct trace_iterator *iter = &info->iter;
8118 
8119 	return trace_poll(iter, filp, poll_table);
8120 }
8121 
8122 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8123 tracing_buffers_read(struct file *filp, char __user *ubuf,
8124 		     size_t count, loff_t *ppos)
8125 {
8126 	struct ftrace_buffer_info *info = filp->private_data;
8127 	struct trace_iterator *iter = &info->iter;
8128 	ssize_t ret = 0;
8129 	ssize_t size;
8130 
8131 	if (!count)
8132 		return 0;
8133 
8134 #ifdef CONFIG_TRACER_MAX_TRACE
8135 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8136 		return -EBUSY;
8137 #endif
8138 
8139 	if (!info->spare) {
8140 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8141 							  iter->cpu_file);
8142 		if (IS_ERR(info->spare)) {
8143 			ret = PTR_ERR(info->spare);
8144 			info->spare = NULL;
8145 		} else {
8146 			info->spare_cpu = iter->cpu_file;
8147 		}
8148 	}
8149 	if (!info->spare)
8150 		return ret;
8151 
8152 	/* Do we have previous read data to read? */
8153 	if (info->read < PAGE_SIZE)
8154 		goto read;
8155 
8156  again:
8157 	trace_access_lock(iter->cpu_file);
8158 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8159 				    &info->spare,
8160 				    count,
8161 				    iter->cpu_file, 0);
8162 	trace_access_unlock(iter->cpu_file);
8163 
8164 	if (ret < 0) {
8165 		if (trace_empty(iter)) {
8166 			if ((filp->f_flags & O_NONBLOCK))
8167 				return -EAGAIN;
8168 
8169 			ret = wait_on_pipe(iter, 0);
8170 			if (ret)
8171 				return ret;
8172 
8173 			goto again;
8174 		}
8175 		return 0;
8176 	}
8177 
8178 	info->read = 0;
8179  read:
8180 	size = PAGE_SIZE - info->read;
8181 	if (size > count)
8182 		size = count;
8183 
8184 	ret = copy_to_user(ubuf, info->spare + info->read, size);
8185 	if (ret == size)
8186 		return -EFAULT;
8187 
8188 	size -= ret;
8189 
8190 	*ppos += size;
8191 	info->read += size;
8192 
8193 	return size;
8194 }
8195 
tracing_buffers_release(struct inode * inode,struct file * file)8196 static int tracing_buffers_release(struct inode *inode, struct file *file)
8197 {
8198 	struct ftrace_buffer_info *info = file->private_data;
8199 	struct trace_iterator *iter = &info->iter;
8200 
8201 	mutex_lock(&trace_types_lock);
8202 
8203 	iter->tr->trace_ref--;
8204 
8205 	__trace_array_put(iter->tr);
8206 
8207 	iter->wait_index++;
8208 	/* Make sure the waiters see the new wait_index */
8209 	smp_wmb();
8210 
8211 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8212 
8213 	if (info->spare)
8214 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8215 					   info->spare_cpu, info->spare);
8216 	kvfree(info);
8217 
8218 	mutex_unlock(&trace_types_lock);
8219 
8220 	return 0;
8221 }
8222 
8223 struct buffer_ref {
8224 	struct trace_buffer	*buffer;
8225 	void			*page;
8226 	int			cpu;
8227 	refcount_t		refcount;
8228 };
8229 
buffer_ref_release(struct buffer_ref * ref)8230 static void buffer_ref_release(struct buffer_ref *ref)
8231 {
8232 	if (!refcount_dec_and_test(&ref->refcount))
8233 		return;
8234 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8235 	kfree(ref);
8236 }
8237 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8238 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8239 				    struct pipe_buffer *buf)
8240 {
8241 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8242 
8243 	buffer_ref_release(ref);
8244 	buf->private = 0;
8245 }
8246 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8247 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8248 				struct pipe_buffer *buf)
8249 {
8250 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8251 
8252 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8253 		return false;
8254 
8255 	refcount_inc(&ref->refcount);
8256 	return true;
8257 }
8258 
8259 /* Pipe buffer operations for a buffer. */
8260 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8261 	.release		= buffer_pipe_buf_release,
8262 	.get			= buffer_pipe_buf_get,
8263 };
8264 
8265 /*
8266  * Callback from splice_to_pipe(), if we need to release some pages
8267  * at the end of the spd in case we error'ed out in filling the pipe.
8268  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8269 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8270 {
8271 	struct buffer_ref *ref =
8272 		(struct buffer_ref *)spd->partial[i].private;
8273 
8274 	buffer_ref_release(ref);
8275 	spd->partial[i].private = 0;
8276 }
8277 
8278 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8279 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8280 			    struct pipe_inode_info *pipe, size_t len,
8281 			    unsigned int flags)
8282 {
8283 	struct ftrace_buffer_info *info = file->private_data;
8284 	struct trace_iterator *iter = &info->iter;
8285 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8286 	struct page *pages_def[PIPE_DEF_BUFFERS];
8287 	struct splice_pipe_desc spd = {
8288 		.pages		= pages_def,
8289 		.partial	= partial_def,
8290 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8291 		.ops		= &buffer_pipe_buf_ops,
8292 		.spd_release	= buffer_spd_release,
8293 	};
8294 	struct buffer_ref *ref;
8295 	int entries, i;
8296 	ssize_t ret = 0;
8297 
8298 #ifdef CONFIG_TRACER_MAX_TRACE
8299 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8300 		return -EBUSY;
8301 #endif
8302 
8303 	if (*ppos & (PAGE_SIZE - 1))
8304 		return -EINVAL;
8305 
8306 	if (len & (PAGE_SIZE - 1)) {
8307 		if (len < PAGE_SIZE)
8308 			return -EINVAL;
8309 		len &= PAGE_MASK;
8310 	}
8311 
8312 	if (splice_grow_spd(pipe, &spd))
8313 		return -ENOMEM;
8314 
8315  again:
8316 	trace_access_lock(iter->cpu_file);
8317 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8318 
8319 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8320 		struct page *page;
8321 		int r;
8322 
8323 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8324 		if (!ref) {
8325 			ret = -ENOMEM;
8326 			break;
8327 		}
8328 
8329 		refcount_set(&ref->refcount, 1);
8330 		ref->buffer = iter->array_buffer->buffer;
8331 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8332 		if (IS_ERR(ref->page)) {
8333 			ret = PTR_ERR(ref->page);
8334 			ref->page = NULL;
8335 			kfree(ref);
8336 			break;
8337 		}
8338 		ref->cpu = iter->cpu_file;
8339 
8340 		r = ring_buffer_read_page(ref->buffer, &ref->page,
8341 					  len, iter->cpu_file, 1);
8342 		if (r < 0) {
8343 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8344 						   ref->page);
8345 			kfree(ref);
8346 			break;
8347 		}
8348 
8349 		page = virt_to_page(ref->page);
8350 
8351 		spd.pages[i] = page;
8352 		spd.partial[i].len = PAGE_SIZE;
8353 		spd.partial[i].offset = 0;
8354 		spd.partial[i].private = (unsigned long)ref;
8355 		spd.nr_pages++;
8356 		*ppos += PAGE_SIZE;
8357 
8358 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8359 	}
8360 
8361 	trace_access_unlock(iter->cpu_file);
8362 	spd.nr_pages = i;
8363 
8364 	/* did we read anything? */
8365 	if (!spd.nr_pages) {
8366 		long wait_index;
8367 
8368 		if (ret)
8369 			goto out;
8370 
8371 		ret = -EAGAIN;
8372 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8373 			goto out;
8374 
8375 		wait_index = READ_ONCE(iter->wait_index);
8376 
8377 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8378 		if (ret)
8379 			goto out;
8380 
8381 		/* No need to wait after waking up when tracing is off */
8382 		if (!tracer_tracing_is_on(iter->tr))
8383 			goto out;
8384 
8385 		/* Make sure we see the new wait_index */
8386 		smp_rmb();
8387 		if (wait_index != iter->wait_index)
8388 			goto out;
8389 
8390 		goto again;
8391 	}
8392 
8393 	ret = splice_to_pipe(pipe, &spd);
8394 out:
8395 	splice_shrink_spd(&spd);
8396 
8397 	return ret;
8398 }
8399 
8400 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8401 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8402 {
8403 	struct ftrace_buffer_info *info = file->private_data;
8404 	struct trace_iterator *iter = &info->iter;
8405 
8406 	if (cmd)
8407 		return -ENOIOCTLCMD;
8408 
8409 	mutex_lock(&trace_types_lock);
8410 
8411 	iter->wait_index++;
8412 	/* Make sure the waiters see the new wait_index */
8413 	smp_wmb();
8414 
8415 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8416 
8417 	mutex_unlock(&trace_types_lock);
8418 	return 0;
8419 }
8420 
8421 static const struct file_operations tracing_buffers_fops = {
8422 	.open		= tracing_buffers_open,
8423 	.read		= tracing_buffers_read,
8424 	.poll		= tracing_buffers_poll,
8425 	.release	= tracing_buffers_release,
8426 	.splice_read	= tracing_buffers_splice_read,
8427 	.unlocked_ioctl = tracing_buffers_ioctl,
8428 	.llseek		= no_llseek,
8429 };
8430 
8431 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8432 tracing_stats_read(struct file *filp, char __user *ubuf,
8433 		   size_t count, loff_t *ppos)
8434 {
8435 	struct inode *inode = file_inode(filp);
8436 	struct trace_array *tr = inode->i_private;
8437 	struct array_buffer *trace_buf = &tr->array_buffer;
8438 	int cpu = tracing_get_cpu(inode);
8439 	struct trace_seq *s;
8440 	unsigned long cnt;
8441 	unsigned long long t;
8442 	unsigned long usec_rem;
8443 
8444 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8445 	if (!s)
8446 		return -ENOMEM;
8447 
8448 	trace_seq_init(s);
8449 
8450 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8451 	trace_seq_printf(s, "entries: %ld\n", cnt);
8452 
8453 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8454 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8455 
8456 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8457 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8458 
8459 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8460 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8461 
8462 	if (trace_clocks[tr->clock_id].in_ns) {
8463 		/* local or global for trace_clock */
8464 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8465 		usec_rem = do_div(t, USEC_PER_SEC);
8466 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8467 								t, usec_rem);
8468 
8469 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8470 		usec_rem = do_div(t, USEC_PER_SEC);
8471 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8472 	} else {
8473 		/* counter or tsc mode for trace_clock */
8474 		trace_seq_printf(s, "oldest event ts: %llu\n",
8475 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8476 
8477 		trace_seq_printf(s, "now ts: %llu\n",
8478 				ring_buffer_time_stamp(trace_buf->buffer));
8479 	}
8480 
8481 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8482 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8483 
8484 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8485 	trace_seq_printf(s, "read events: %ld\n", cnt);
8486 
8487 	count = simple_read_from_buffer(ubuf, count, ppos,
8488 					s->buffer, trace_seq_used(s));
8489 
8490 	kfree(s);
8491 
8492 	return count;
8493 }
8494 
8495 static const struct file_operations tracing_stats_fops = {
8496 	.open		= tracing_open_generic_tr,
8497 	.read		= tracing_stats_read,
8498 	.llseek		= generic_file_llseek,
8499 	.release	= tracing_release_generic_tr,
8500 };
8501 
8502 #ifdef CONFIG_DYNAMIC_FTRACE
8503 
8504 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8505 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8506 		  size_t cnt, loff_t *ppos)
8507 {
8508 	ssize_t ret;
8509 	char *buf;
8510 	int r;
8511 
8512 	/* 256 should be plenty to hold the amount needed */
8513 	buf = kmalloc(256, GFP_KERNEL);
8514 	if (!buf)
8515 		return -ENOMEM;
8516 
8517 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8518 		      ftrace_update_tot_cnt,
8519 		      ftrace_number_of_pages,
8520 		      ftrace_number_of_groups);
8521 
8522 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8523 	kfree(buf);
8524 	return ret;
8525 }
8526 
8527 static const struct file_operations tracing_dyn_info_fops = {
8528 	.open		= tracing_open_generic,
8529 	.read		= tracing_read_dyn_info,
8530 	.llseek		= generic_file_llseek,
8531 };
8532 #endif /* CONFIG_DYNAMIC_FTRACE */
8533 
8534 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8535 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8536 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8537 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8538 		void *data)
8539 {
8540 	tracing_snapshot_instance(tr);
8541 }
8542 
8543 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8544 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8545 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8546 		      void *data)
8547 {
8548 	struct ftrace_func_mapper *mapper = data;
8549 	long *count = NULL;
8550 
8551 	if (mapper)
8552 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8553 
8554 	if (count) {
8555 
8556 		if (*count <= 0)
8557 			return;
8558 
8559 		(*count)--;
8560 	}
8561 
8562 	tracing_snapshot_instance(tr);
8563 }
8564 
8565 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8566 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8567 		      struct ftrace_probe_ops *ops, void *data)
8568 {
8569 	struct ftrace_func_mapper *mapper = data;
8570 	long *count = NULL;
8571 
8572 	seq_printf(m, "%ps:", (void *)ip);
8573 
8574 	seq_puts(m, "snapshot");
8575 
8576 	if (mapper)
8577 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8578 
8579 	if (count)
8580 		seq_printf(m, ":count=%ld\n", *count);
8581 	else
8582 		seq_puts(m, ":unlimited\n");
8583 
8584 	return 0;
8585 }
8586 
8587 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8588 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8589 		     unsigned long ip, void *init_data, void **data)
8590 {
8591 	struct ftrace_func_mapper *mapper = *data;
8592 
8593 	if (!mapper) {
8594 		mapper = allocate_ftrace_func_mapper();
8595 		if (!mapper)
8596 			return -ENOMEM;
8597 		*data = mapper;
8598 	}
8599 
8600 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8601 }
8602 
8603 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8604 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8605 		     unsigned long ip, void *data)
8606 {
8607 	struct ftrace_func_mapper *mapper = data;
8608 
8609 	if (!ip) {
8610 		if (!mapper)
8611 			return;
8612 		free_ftrace_func_mapper(mapper, NULL);
8613 		return;
8614 	}
8615 
8616 	ftrace_func_mapper_remove_ip(mapper, ip);
8617 }
8618 
8619 static struct ftrace_probe_ops snapshot_probe_ops = {
8620 	.func			= ftrace_snapshot,
8621 	.print			= ftrace_snapshot_print,
8622 };
8623 
8624 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8625 	.func			= ftrace_count_snapshot,
8626 	.print			= ftrace_snapshot_print,
8627 	.init			= ftrace_snapshot_init,
8628 	.free			= ftrace_snapshot_free,
8629 };
8630 
8631 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8632 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8633 			       char *glob, char *cmd, char *param, int enable)
8634 {
8635 	struct ftrace_probe_ops *ops;
8636 	void *count = (void *)-1;
8637 	char *number;
8638 	int ret;
8639 
8640 	if (!tr)
8641 		return -ENODEV;
8642 
8643 	/* hash funcs only work with set_ftrace_filter */
8644 	if (!enable)
8645 		return -EINVAL;
8646 
8647 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8648 
8649 	if (glob[0] == '!')
8650 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8651 
8652 	if (!param)
8653 		goto out_reg;
8654 
8655 	number = strsep(&param, ":");
8656 
8657 	if (!strlen(number))
8658 		goto out_reg;
8659 
8660 	/*
8661 	 * We use the callback data field (which is a pointer)
8662 	 * as our counter.
8663 	 */
8664 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8665 	if (ret)
8666 		return ret;
8667 
8668  out_reg:
8669 	ret = tracing_alloc_snapshot_instance(tr);
8670 	if (ret < 0)
8671 		goto out;
8672 
8673 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8674 
8675  out:
8676 	return ret < 0 ? ret : 0;
8677 }
8678 
8679 static struct ftrace_func_command ftrace_snapshot_cmd = {
8680 	.name			= "snapshot",
8681 	.func			= ftrace_trace_snapshot_callback,
8682 };
8683 
register_snapshot_cmd(void)8684 static __init int register_snapshot_cmd(void)
8685 {
8686 	return register_ftrace_command(&ftrace_snapshot_cmd);
8687 }
8688 #else
register_snapshot_cmd(void)8689 static inline __init int register_snapshot_cmd(void) { return 0; }
8690 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8691 
tracing_get_dentry(struct trace_array * tr)8692 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8693 {
8694 	if (WARN_ON(!tr->dir))
8695 		return ERR_PTR(-ENODEV);
8696 
8697 	/* Top directory uses NULL as the parent */
8698 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8699 		return NULL;
8700 
8701 	/* All sub buffers have a descriptor */
8702 	return tr->dir;
8703 }
8704 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8705 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8706 {
8707 	struct dentry *d_tracer;
8708 
8709 	if (tr->percpu_dir)
8710 		return tr->percpu_dir;
8711 
8712 	d_tracer = tracing_get_dentry(tr);
8713 	if (IS_ERR(d_tracer))
8714 		return NULL;
8715 
8716 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8717 
8718 	MEM_FAIL(!tr->percpu_dir,
8719 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8720 
8721 	return tr->percpu_dir;
8722 }
8723 
8724 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8725 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8726 		      void *data, long cpu, const struct file_operations *fops)
8727 {
8728 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8729 
8730 	if (ret) /* See tracing_get_cpu() */
8731 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8732 	return ret;
8733 }
8734 
8735 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8736 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8737 {
8738 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8739 	struct dentry *d_cpu;
8740 	char cpu_dir[30]; /* 30 characters should be more than enough */
8741 
8742 	if (!d_percpu)
8743 		return;
8744 
8745 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8746 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8747 	if (!d_cpu) {
8748 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8749 		return;
8750 	}
8751 
8752 	/* per cpu trace_pipe */
8753 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8754 				tr, cpu, &tracing_pipe_fops);
8755 
8756 	/* per cpu trace */
8757 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8758 				tr, cpu, &tracing_fops);
8759 
8760 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8761 				tr, cpu, &tracing_buffers_fops);
8762 
8763 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8764 				tr, cpu, &tracing_stats_fops);
8765 
8766 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8767 				tr, cpu, &tracing_entries_fops);
8768 
8769 #ifdef CONFIG_TRACER_SNAPSHOT
8770 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8771 				tr, cpu, &snapshot_fops);
8772 
8773 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8774 				tr, cpu, &snapshot_raw_fops);
8775 #endif
8776 }
8777 
8778 #ifdef CONFIG_FTRACE_SELFTEST
8779 /* Let selftest have access to static functions in this file */
8780 #include "trace_selftest.c"
8781 #endif
8782 
8783 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8784 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8785 			loff_t *ppos)
8786 {
8787 	struct trace_option_dentry *topt = filp->private_data;
8788 	char *buf;
8789 
8790 	if (topt->flags->val & topt->opt->bit)
8791 		buf = "1\n";
8792 	else
8793 		buf = "0\n";
8794 
8795 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8796 }
8797 
8798 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8799 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8800 			 loff_t *ppos)
8801 {
8802 	struct trace_option_dentry *topt = filp->private_data;
8803 	unsigned long val;
8804 	int ret;
8805 
8806 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8807 	if (ret)
8808 		return ret;
8809 
8810 	if (val != 0 && val != 1)
8811 		return -EINVAL;
8812 
8813 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8814 		mutex_lock(&trace_types_lock);
8815 		ret = __set_tracer_option(topt->tr, topt->flags,
8816 					  topt->opt, !val);
8817 		mutex_unlock(&trace_types_lock);
8818 		if (ret)
8819 			return ret;
8820 	}
8821 
8822 	*ppos += cnt;
8823 
8824 	return cnt;
8825 }
8826 
tracing_open_options(struct inode * inode,struct file * filp)8827 static int tracing_open_options(struct inode *inode, struct file *filp)
8828 {
8829 	struct trace_option_dentry *topt = inode->i_private;
8830 	int ret;
8831 
8832 	ret = tracing_check_open_get_tr(topt->tr);
8833 	if (ret)
8834 		return ret;
8835 
8836 	filp->private_data = inode->i_private;
8837 	return 0;
8838 }
8839 
tracing_release_options(struct inode * inode,struct file * file)8840 static int tracing_release_options(struct inode *inode, struct file *file)
8841 {
8842 	struct trace_option_dentry *topt = file->private_data;
8843 
8844 	trace_array_put(topt->tr);
8845 	return 0;
8846 }
8847 
8848 static const struct file_operations trace_options_fops = {
8849 	.open = tracing_open_options,
8850 	.read = trace_options_read,
8851 	.write = trace_options_write,
8852 	.llseek	= generic_file_llseek,
8853 	.release = tracing_release_options,
8854 };
8855 
8856 /*
8857  * In order to pass in both the trace_array descriptor as well as the index
8858  * to the flag that the trace option file represents, the trace_array
8859  * has a character array of trace_flags_index[], which holds the index
8860  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8861  * The address of this character array is passed to the flag option file
8862  * read/write callbacks.
8863  *
8864  * In order to extract both the index and the trace_array descriptor,
8865  * get_tr_index() uses the following algorithm.
8866  *
8867  *   idx = *ptr;
8868  *
8869  * As the pointer itself contains the address of the index (remember
8870  * index[1] == 1).
8871  *
8872  * Then to get the trace_array descriptor, by subtracting that index
8873  * from the ptr, we get to the start of the index itself.
8874  *
8875  *   ptr - idx == &index[0]
8876  *
8877  * Then a simple container_of() from that pointer gets us to the
8878  * trace_array descriptor.
8879  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8880 static void get_tr_index(void *data, struct trace_array **ptr,
8881 			 unsigned int *pindex)
8882 {
8883 	*pindex = *(unsigned char *)data;
8884 
8885 	*ptr = container_of(data - *pindex, struct trace_array,
8886 			    trace_flags_index);
8887 }
8888 
8889 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8890 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8891 			loff_t *ppos)
8892 {
8893 	void *tr_index = filp->private_data;
8894 	struct trace_array *tr;
8895 	unsigned int index;
8896 	char *buf;
8897 
8898 	get_tr_index(tr_index, &tr, &index);
8899 
8900 	if (tr->trace_flags & (1 << index))
8901 		buf = "1\n";
8902 	else
8903 		buf = "0\n";
8904 
8905 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8906 }
8907 
8908 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8909 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8910 			 loff_t *ppos)
8911 {
8912 	void *tr_index = filp->private_data;
8913 	struct trace_array *tr;
8914 	unsigned int index;
8915 	unsigned long val;
8916 	int ret;
8917 
8918 	get_tr_index(tr_index, &tr, &index);
8919 
8920 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8921 	if (ret)
8922 		return ret;
8923 
8924 	if (val != 0 && val != 1)
8925 		return -EINVAL;
8926 
8927 	mutex_lock(&event_mutex);
8928 	mutex_lock(&trace_types_lock);
8929 	ret = set_tracer_flag(tr, 1 << index, val);
8930 	mutex_unlock(&trace_types_lock);
8931 	mutex_unlock(&event_mutex);
8932 
8933 	if (ret < 0)
8934 		return ret;
8935 
8936 	*ppos += cnt;
8937 
8938 	return cnt;
8939 }
8940 
8941 static const struct file_operations trace_options_core_fops = {
8942 	.open = tracing_open_generic,
8943 	.read = trace_options_core_read,
8944 	.write = trace_options_core_write,
8945 	.llseek = generic_file_llseek,
8946 };
8947 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8948 struct dentry *trace_create_file(const char *name,
8949 				 umode_t mode,
8950 				 struct dentry *parent,
8951 				 void *data,
8952 				 const struct file_operations *fops)
8953 {
8954 	struct dentry *ret;
8955 
8956 	ret = tracefs_create_file(name, mode, parent, data, fops);
8957 	if (!ret)
8958 		pr_warn("Could not create tracefs '%s' entry\n", name);
8959 
8960 	return ret;
8961 }
8962 
8963 
trace_options_init_dentry(struct trace_array * tr)8964 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8965 {
8966 	struct dentry *d_tracer;
8967 
8968 	if (tr->options)
8969 		return tr->options;
8970 
8971 	d_tracer = tracing_get_dentry(tr);
8972 	if (IS_ERR(d_tracer))
8973 		return NULL;
8974 
8975 	tr->options = tracefs_create_dir("options", d_tracer);
8976 	if (!tr->options) {
8977 		pr_warn("Could not create tracefs directory 'options'\n");
8978 		return NULL;
8979 	}
8980 
8981 	return tr->options;
8982 }
8983 
8984 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8985 create_trace_option_file(struct trace_array *tr,
8986 			 struct trace_option_dentry *topt,
8987 			 struct tracer_flags *flags,
8988 			 struct tracer_opt *opt)
8989 {
8990 	struct dentry *t_options;
8991 
8992 	t_options = trace_options_init_dentry(tr);
8993 	if (!t_options)
8994 		return;
8995 
8996 	topt->flags = flags;
8997 	topt->opt = opt;
8998 	topt->tr = tr;
8999 
9000 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9001 					t_options, topt, &trace_options_fops);
9002 
9003 }
9004 
9005 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9006 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9007 {
9008 	struct trace_option_dentry *topts;
9009 	struct trace_options *tr_topts;
9010 	struct tracer_flags *flags;
9011 	struct tracer_opt *opts;
9012 	int cnt;
9013 	int i;
9014 
9015 	if (!tracer)
9016 		return;
9017 
9018 	flags = tracer->flags;
9019 
9020 	if (!flags || !flags->opts)
9021 		return;
9022 
9023 	/*
9024 	 * If this is an instance, only create flags for tracers
9025 	 * the instance may have.
9026 	 */
9027 	if (!trace_ok_for_array(tracer, tr))
9028 		return;
9029 
9030 	for (i = 0; i < tr->nr_topts; i++) {
9031 		/* Make sure there's no duplicate flags. */
9032 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9033 			return;
9034 	}
9035 
9036 	opts = flags->opts;
9037 
9038 	for (cnt = 0; opts[cnt].name; cnt++)
9039 		;
9040 
9041 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9042 	if (!topts)
9043 		return;
9044 
9045 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9046 			    GFP_KERNEL);
9047 	if (!tr_topts) {
9048 		kfree(topts);
9049 		return;
9050 	}
9051 
9052 	tr->topts = tr_topts;
9053 	tr->topts[tr->nr_topts].tracer = tracer;
9054 	tr->topts[tr->nr_topts].topts = topts;
9055 	tr->nr_topts++;
9056 
9057 	for (cnt = 0; opts[cnt].name; cnt++) {
9058 		create_trace_option_file(tr, &topts[cnt], flags,
9059 					 &opts[cnt]);
9060 		MEM_FAIL(topts[cnt].entry == NULL,
9061 			  "Failed to create trace option: %s",
9062 			  opts[cnt].name);
9063 	}
9064 }
9065 
9066 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9067 create_trace_option_core_file(struct trace_array *tr,
9068 			      const char *option, long index)
9069 {
9070 	struct dentry *t_options;
9071 
9072 	t_options = trace_options_init_dentry(tr);
9073 	if (!t_options)
9074 		return NULL;
9075 
9076 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9077 				 (void *)&tr->trace_flags_index[index],
9078 				 &trace_options_core_fops);
9079 }
9080 
create_trace_options_dir(struct trace_array * tr)9081 static void create_trace_options_dir(struct trace_array *tr)
9082 {
9083 	struct dentry *t_options;
9084 	bool top_level = tr == &global_trace;
9085 	int i;
9086 
9087 	t_options = trace_options_init_dentry(tr);
9088 	if (!t_options)
9089 		return;
9090 
9091 	for (i = 0; trace_options[i]; i++) {
9092 		if (top_level ||
9093 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9094 			create_trace_option_core_file(tr, trace_options[i], i);
9095 	}
9096 }
9097 
9098 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9099 rb_simple_read(struct file *filp, char __user *ubuf,
9100 	       size_t cnt, loff_t *ppos)
9101 {
9102 	struct trace_array *tr = filp->private_data;
9103 	char buf[64];
9104 	int r;
9105 
9106 	r = tracer_tracing_is_on(tr);
9107 	r = sprintf(buf, "%d\n", r);
9108 
9109 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9110 }
9111 
9112 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9113 rb_simple_write(struct file *filp, const char __user *ubuf,
9114 		size_t cnt, loff_t *ppos)
9115 {
9116 	struct trace_array *tr = filp->private_data;
9117 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9118 	unsigned long val;
9119 	int ret;
9120 
9121 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9122 	if (ret)
9123 		return ret;
9124 
9125 	if (buffer) {
9126 		mutex_lock(&trace_types_lock);
9127 		if (!!val == tracer_tracing_is_on(tr)) {
9128 			val = 0; /* do nothing */
9129 		} else if (val) {
9130 			tracer_tracing_on(tr);
9131 			if (tr->current_trace->start)
9132 				tr->current_trace->start(tr);
9133 		} else {
9134 			tracer_tracing_off(tr);
9135 			if (tr->current_trace->stop)
9136 				tr->current_trace->stop(tr);
9137 			/* Wake up any waiters */
9138 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9139 		}
9140 		mutex_unlock(&trace_types_lock);
9141 	}
9142 
9143 	(*ppos)++;
9144 
9145 	return cnt;
9146 }
9147 
9148 static const struct file_operations rb_simple_fops = {
9149 	.open		= tracing_open_generic_tr,
9150 	.read		= rb_simple_read,
9151 	.write		= rb_simple_write,
9152 	.release	= tracing_release_generic_tr,
9153 	.llseek		= default_llseek,
9154 };
9155 
9156 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9157 buffer_percent_read(struct file *filp, char __user *ubuf,
9158 		    size_t cnt, loff_t *ppos)
9159 {
9160 	struct trace_array *tr = filp->private_data;
9161 	char buf[64];
9162 	int r;
9163 
9164 	r = tr->buffer_percent;
9165 	r = sprintf(buf, "%d\n", r);
9166 
9167 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9168 }
9169 
9170 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9171 buffer_percent_write(struct file *filp, const char __user *ubuf,
9172 		     size_t cnt, loff_t *ppos)
9173 {
9174 	struct trace_array *tr = filp->private_data;
9175 	unsigned long val;
9176 	int ret;
9177 
9178 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9179 	if (ret)
9180 		return ret;
9181 
9182 	if (val > 100)
9183 		return -EINVAL;
9184 
9185 	tr->buffer_percent = val;
9186 
9187 	(*ppos)++;
9188 
9189 	return cnt;
9190 }
9191 
9192 static const struct file_operations buffer_percent_fops = {
9193 	.open		= tracing_open_generic_tr,
9194 	.read		= buffer_percent_read,
9195 	.write		= buffer_percent_write,
9196 	.release	= tracing_release_generic_tr,
9197 	.llseek		= default_llseek,
9198 };
9199 
9200 static struct dentry *trace_instance_dir;
9201 
9202 static void
9203 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9204 
9205 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9206 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9207 {
9208 	enum ring_buffer_flags rb_flags;
9209 
9210 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9211 
9212 	buf->tr = tr;
9213 
9214 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9215 	if (!buf->buffer)
9216 		return -ENOMEM;
9217 
9218 	buf->data = alloc_percpu(struct trace_array_cpu);
9219 	if (!buf->data) {
9220 		ring_buffer_free(buf->buffer);
9221 		buf->buffer = NULL;
9222 		return -ENOMEM;
9223 	}
9224 
9225 	/* Allocate the first page for all buffers */
9226 	set_buffer_entries(&tr->array_buffer,
9227 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9228 
9229 	return 0;
9230 }
9231 
allocate_trace_buffers(struct trace_array * tr,int size)9232 static int allocate_trace_buffers(struct trace_array *tr, int size)
9233 {
9234 	int ret;
9235 
9236 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9237 	if (ret)
9238 		return ret;
9239 
9240 #ifdef CONFIG_TRACER_MAX_TRACE
9241 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9242 				    allocate_snapshot ? size : 1);
9243 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9244 		ring_buffer_free(tr->array_buffer.buffer);
9245 		tr->array_buffer.buffer = NULL;
9246 		free_percpu(tr->array_buffer.data);
9247 		tr->array_buffer.data = NULL;
9248 		return -ENOMEM;
9249 	}
9250 	tr->allocated_snapshot = allocate_snapshot;
9251 
9252 	/*
9253 	 * Only the top level trace array gets its snapshot allocated
9254 	 * from the kernel command line.
9255 	 */
9256 	allocate_snapshot = false;
9257 #endif
9258 
9259 	return 0;
9260 }
9261 
free_trace_buffer(struct array_buffer * buf)9262 static void free_trace_buffer(struct array_buffer *buf)
9263 {
9264 	if (buf->buffer) {
9265 		ring_buffer_free(buf->buffer);
9266 		buf->buffer = NULL;
9267 		free_percpu(buf->data);
9268 		buf->data = NULL;
9269 	}
9270 }
9271 
free_trace_buffers(struct trace_array * tr)9272 static void free_trace_buffers(struct trace_array *tr)
9273 {
9274 	if (!tr)
9275 		return;
9276 
9277 	free_trace_buffer(&tr->array_buffer);
9278 
9279 #ifdef CONFIG_TRACER_MAX_TRACE
9280 	free_trace_buffer(&tr->max_buffer);
9281 #endif
9282 }
9283 
init_trace_flags_index(struct trace_array * tr)9284 static void init_trace_flags_index(struct trace_array *tr)
9285 {
9286 	int i;
9287 
9288 	/* Used by the trace options files */
9289 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9290 		tr->trace_flags_index[i] = i;
9291 }
9292 
__update_tracer_options(struct trace_array * tr)9293 static void __update_tracer_options(struct trace_array *tr)
9294 {
9295 	struct tracer *t;
9296 
9297 	for (t = trace_types; t; t = t->next)
9298 		add_tracer_options(tr, t);
9299 }
9300 
update_tracer_options(struct trace_array * tr)9301 static void update_tracer_options(struct trace_array *tr)
9302 {
9303 	mutex_lock(&trace_types_lock);
9304 	tracer_options_updated = true;
9305 	__update_tracer_options(tr);
9306 	mutex_unlock(&trace_types_lock);
9307 }
9308 
9309 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9310 struct trace_array *trace_array_find(const char *instance)
9311 {
9312 	struct trace_array *tr, *found = NULL;
9313 
9314 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9315 		if (tr->name && strcmp(tr->name, instance) == 0) {
9316 			found = tr;
9317 			break;
9318 		}
9319 	}
9320 
9321 	return found;
9322 }
9323 
trace_array_find_get(const char * instance)9324 struct trace_array *trace_array_find_get(const char *instance)
9325 {
9326 	struct trace_array *tr;
9327 
9328 	mutex_lock(&trace_types_lock);
9329 	tr = trace_array_find(instance);
9330 	if (tr)
9331 		tr->ref++;
9332 	mutex_unlock(&trace_types_lock);
9333 
9334 	return tr;
9335 }
9336 
trace_array_create_dir(struct trace_array * tr)9337 static int trace_array_create_dir(struct trace_array *tr)
9338 {
9339 	int ret;
9340 
9341 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9342 	if (!tr->dir)
9343 		return -EINVAL;
9344 
9345 	ret = event_trace_add_tracer(tr->dir, tr);
9346 	if (ret) {
9347 		tracefs_remove(tr->dir);
9348 		return ret;
9349 	}
9350 
9351 	init_tracer_tracefs(tr, tr->dir);
9352 	__update_tracer_options(tr);
9353 
9354 	return ret;
9355 }
9356 
trace_array_create(const char * name)9357 static struct trace_array *trace_array_create(const char *name)
9358 {
9359 	struct trace_array *tr;
9360 	int ret;
9361 
9362 	ret = -ENOMEM;
9363 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9364 	if (!tr)
9365 		return ERR_PTR(ret);
9366 
9367 	tr->name = kstrdup(name, GFP_KERNEL);
9368 	if (!tr->name)
9369 		goto out_free_tr;
9370 
9371 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9372 		goto out_free_tr;
9373 
9374 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9375 
9376 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9377 
9378 	raw_spin_lock_init(&tr->start_lock);
9379 
9380 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9381 
9382 	tr->current_trace = &nop_trace;
9383 
9384 	INIT_LIST_HEAD(&tr->systems);
9385 	INIT_LIST_HEAD(&tr->events);
9386 	INIT_LIST_HEAD(&tr->hist_vars);
9387 	INIT_LIST_HEAD(&tr->err_log);
9388 
9389 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9390 		goto out_free_tr;
9391 
9392 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9393 		goto out_free_tr;
9394 
9395 	ftrace_init_trace_array(tr);
9396 
9397 	init_trace_flags_index(tr);
9398 
9399 	if (trace_instance_dir) {
9400 		ret = trace_array_create_dir(tr);
9401 		if (ret)
9402 			goto out_free_tr;
9403 	} else
9404 		__trace_early_add_events(tr);
9405 
9406 	list_add(&tr->list, &ftrace_trace_arrays);
9407 
9408 	tr->ref++;
9409 
9410 	return tr;
9411 
9412  out_free_tr:
9413 	ftrace_free_ftrace_ops(tr);
9414 	free_trace_buffers(tr);
9415 	free_cpumask_var(tr->tracing_cpumask);
9416 	kfree(tr->name);
9417 	kfree(tr);
9418 
9419 	return ERR_PTR(ret);
9420 }
9421 
instance_mkdir(const char * name)9422 static int instance_mkdir(const char *name)
9423 {
9424 	struct trace_array *tr;
9425 	int ret;
9426 
9427 	mutex_lock(&event_mutex);
9428 	mutex_lock(&trace_types_lock);
9429 
9430 	ret = -EEXIST;
9431 	if (trace_array_find(name))
9432 		goto out_unlock;
9433 
9434 	tr = trace_array_create(name);
9435 
9436 	ret = PTR_ERR_OR_ZERO(tr);
9437 
9438 out_unlock:
9439 	mutex_unlock(&trace_types_lock);
9440 	mutex_unlock(&event_mutex);
9441 	return ret;
9442 }
9443 
9444 /**
9445  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9446  * @name: The name of the trace array to be looked up/created.
9447  *
9448  * Returns pointer to trace array with given name.
9449  * NULL, if it cannot be created.
9450  *
9451  * NOTE: This function increments the reference counter associated with the
9452  * trace array returned. This makes sure it cannot be freed while in use.
9453  * Use trace_array_put() once the trace array is no longer needed.
9454  * If the trace_array is to be freed, trace_array_destroy() needs to
9455  * be called after the trace_array_put(), or simply let user space delete
9456  * it from the tracefs instances directory. But until the
9457  * trace_array_put() is called, user space can not delete it.
9458  *
9459  */
trace_array_get_by_name(const char * name)9460 struct trace_array *trace_array_get_by_name(const char *name)
9461 {
9462 	struct trace_array *tr;
9463 
9464 	mutex_lock(&event_mutex);
9465 	mutex_lock(&trace_types_lock);
9466 
9467 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9468 		if (tr->name && strcmp(tr->name, name) == 0)
9469 			goto out_unlock;
9470 	}
9471 
9472 	tr = trace_array_create(name);
9473 
9474 	if (IS_ERR(tr))
9475 		tr = NULL;
9476 out_unlock:
9477 	if (tr)
9478 		tr->ref++;
9479 
9480 	mutex_unlock(&trace_types_lock);
9481 	mutex_unlock(&event_mutex);
9482 	return tr;
9483 }
9484 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9485 
__remove_instance(struct trace_array * tr)9486 static int __remove_instance(struct trace_array *tr)
9487 {
9488 	int i;
9489 
9490 	/* Reference counter for a newly created trace array = 1. */
9491 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9492 		return -EBUSY;
9493 
9494 	list_del(&tr->list);
9495 
9496 	/* Disable all the flags that were enabled coming in */
9497 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9498 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9499 			set_tracer_flag(tr, 1 << i, 0);
9500 	}
9501 
9502 	tracing_set_nop(tr);
9503 	clear_ftrace_function_probes(tr);
9504 	event_trace_del_tracer(tr);
9505 	ftrace_clear_pids(tr);
9506 	ftrace_destroy_function_files(tr);
9507 	tracefs_remove(tr->dir);
9508 	free_percpu(tr->last_func_repeats);
9509 	free_trace_buffers(tr);
9510 	clear_tracing_err_log(tr);
9511 
9512 	for (i = 0; i < tr->nr_topts; i++) {
9513 		kfree(tr->topts[i].topts);
9514 	}
9515 	kfree(tr->topts);
9516 
9517 	free_cpumask_var(tr->tracing_cpumask);
9518 	kfree(tr->name);
9519 	kfree(tr);
9520 
9521 	return 0;
9522 }
9523 
trace_array_destroy(struct trace_array * this_tr)9524 int trace_array_destroy(struct trace_array *this_tr)
9525 {
9526 	struct trace_array *tr;
9527 	int ret;
9528 
9529 	if (!this_tr)
9530 		return -EINVAL;
9531 
9532 	mutex_lock(&event_mutex);
9533 	mutex_lock(&trace_types_lock);
9534 
9535 	ret = -ENODEV;
9536 
9537 	/* Making sure trace array exists before destroying it. */
9538 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9539 		if (tr == this_tr) {
9540 			ret = __remove_instance(tr);
9541 			break;
9542 		}
9543 	}
9544 
9545 	mutex_unlock(&trace_types_lock);
9546 	mutex_unlock(&event_mutex);
9547 
9548 	return ret;
9549 }
9550 EXPORT_SYMBOL_GPL(trace_array_destroy);
9551 
instance_rmdir(const char * name)9552 static int instance_rmdir(const char *name)
9553 {
9554 	struct trace_array *tr;
9555 	int ret;
9556 
9557 	mutex_lock(&event_mutex);
9558 	mutex_lock(&trace_types_lock);
9559 
9560 	ret = -ENODEV;
9561 	tr = trace_array_find(name);
9562 	if (tr)
9563 		ret = __remove_instance(tr);
9564 
9565 	mutex_unlock(&trace_types_lock);
9566 	mutex_unlock(&event_mutex);
9567 
9568 	return ret;
9569 }
9570 
create_trace_instances(struct dentry * d_tracer)9571 static __init void create_trace_instances(struct dentry *d_tracer)
9572 {
9573 	struct trace_array *tr;
9574 
9575 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9576 							 instance_mkdir,
9577 							 instance_rmdir);
9578 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9579 		return;
9580 
9581 	mutex_lock(&event_mutex);
9582 	mutex_lock(&trace_types_lock);
9583 
9584 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9585 		if (!tr->name)
9586 			continue;
9587 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9588 			     "Failed to create instance directory\n"))
9589 			break;
9590 	}
9591 
9592 	mutex_unlock(&trace_types_lock);
9593 	mutex_unlock(&event_mutex);
9594 }
9595 
9596 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9597 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9598 {
9599 	struct trace_event_file *file;
9600 	int cpu;
9601 
9602 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9603 			tr, &show_traces_fops);
9604 
9605 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9606 			tr, &set_tracer_fops);
9607 
9608 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9609 			  tr, &tracing_cpumask_fops);
9610 
9611 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9612 			  tr, &tracing_iter_fops);
9613 
9614 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9615 			  tr, &tracing_fops);
9616 
9617 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9618 			  tr, &tracing_pipe_fops);
9619 
9620 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9621 			  tr, &tracing_entries_fops);
9622 
9623 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9624 			  tr, &tracing_total_entries_fops);
9625 
9626 	trace_create_file("free_buffer", 0200, d_tracer,
9627 			  tr, &tracing_free_buffer_fops);
9628 
9629 	trace_create_file("trace_marker", 0220, d_tracer,
9630 			  tr, &tracing_mark_fops);
9631 
9632 	file = __find_event_file(tr, "ftrace", "print");
9633 	if (file && file->dir)
9634 		trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9635 				  file, &event_trigger_fops);
9636 	tr->trace_marker_file = file;
9637 
9638 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9639 			  tr, &tracing_mark_raw_fops);
9640 
9641 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9642 			  &trace_clock_fops);
9643 
9644 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9645 			  tr, &rb_simple_fops);
9646 
9647 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9648 			  &trace_time_stamp_mode_fops);
9649 
9650 	tr->buffer_percent = 50;
9651 
9652 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9653 			tr, &buffer_percent_fops);
9654 
9655 	create_trace_options_dir(tr);
9656 
9657 #ifdef CONFIG_TRACER_MAX_TRACE
9658 	trace_create_maxlat_file(tr, d_tracer);
9659 #endif
9660 
9661 	if (ftrace_create_function_files(tr, d_tracer))
9662 		MEM_FAIL(1, "Could not allocate function filter files");
9663 
9664 #ifdef CONFIG_TRACER_SNAPSHOT
9665 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9666 			  tr, &snapshot_fops);
9667 #endif
9668 
9669 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9670 			  tr, &tracing_err_log_fops);
9671 
9672 	for_each_tracing_cpu(cpu)
9673 		tracing_init_tracefs_percpu(tr, cpu);
9674 
9675 	ftrace_init_tracefs(tr, d_tracer);
9676 }
9677 
trace_automount(struct dentry * mntpt,void * ingore)9678 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9679 {
9680 	struct vfsmount *mnt;
9681 	struct file_system_type *type;
9682 
9683 	/*
9684 	 * To maintain backward compatibility for tools that mount
9685 	 * debugfs to get to the tracing facility, tracefs is automatically
9686 	 * mounted to the debugfs/tracing directory.
9687 	 */
9688 	type = get_fs_type("tracefs");
9689 	if (!type)
9690 		return NULL;
9691 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9692 	put_filesystem(type);
9693 	if (IS_ERR(mnt))
9694 		return NULL;
9695 	mntget(mnt);
9696 
9697 	return mnt;
9698 }
9699 
9700 /**
9701  * tracing_init_dentry - initialize top level trace array
9702  *
9703  * This is called when creating files or directories in the tracing
9704  * directory. It is called via fs_initcall() by any of the boot up code
9705  * and expects to return the dentry of the top level tracing directory.
9706  */
tracing_init_dentry(void)9707 int tracing_init_dentry(void)
9708 {
9709 	struct trace_array *tr = &global_trace;
9710 
9711 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9712 		pr_warn("Tracing disabled due to lockdown\n");
9713 		return -EPERM;
9714 	}
9715 
9716 	/* The top level trace array uses  NULL as parent */
9717 	if (tr->dir)
9718 		return 0;
9719 
9720 	if (WARN_ON(!tracefs_initialized()))
9721 		return -ENODEV;
9722 
9723 	/*
9724 	 * As there may still be users that expect the tracing
9725 	 * files to exist in debugfs/tracing, we must automount
9726 	 * the tracefs file system there, so older tools still
9727 	 * work with the newer kernel.
9728 	 */
9729 	tr->dir = debugfs_create_automount("tracing", NULL,
9730 					   trace_automount, NULL);
9731 
9732 	return 0;
9733 }
9734 
9735 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9736 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9737 
9738 static struct workqueue_struct *eval_map_wq __initdata;
9739 static struct work_struct eval_map_work __initdata;
9740 
eval_map_work_func(struct work_struct * work)9741 static void __init eval_map_work_func(struct work_struct *work)
9742 {
9743 	int len;
9744 
9745 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9746 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9747 }
9748 
trace_eval_init(void)9749 static int __init trace_eval_init(void)
9750 {
9751 	INIT_WORK(&eval_map_work, eval_map_work_func);
9752 
9753 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9754 	if (!eval_map_wq) {
9755 		pr_err("Unable to allocate eval_map_wq\n");
9756 		/* Do work here */
9757 		eval_map_work_func(&eval_map_work);
9758 		return -ENOMEM;
9759 	}
9760 
9761 	queue_work(eval_map_wq, &eval_map_work);
9762 	return 0;
9763 }
9764 
trace_eval_sync(void)9765 static int __init trace_eval_sync(void)
9766 {
9767 	/* Make sure the eval map updates are finished */
9768 	if (eval_map_wq)
9769 		destroy_workqueue(eval_map_wq);
9770 	return 0;
9771 }
9772 
9773 late_initcall_sync(trace_eval_sync);
9774 
9775 
9776 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9777 static void trace_module_add_evals(struct module *mod)
9778 {
9779 	if (!mod->num_trace_evals)
9780 		return;
9781 
9782 	/*
9783 	 * Modules with bad taint do not have events created, do
9784 	 * not bother with enums either.
9785 	 */
9786 	if (trace_module_has_bad_taint(mod))
9787 		return;
9788 
9789 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9790 }
9791 
9792 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9793 static void trace_module_remove_evals(struct module *mod)
9794 {
9795 	union trace_eval_map_item *map;
9796 	union trace_eval_map_item **last = &trace_eval_maps;
9797 
9798 	if (!mod->num_trace_evals)
9799 		return;
9800 
9801 	mutex_lock(&trace_eval_mutex);
9802 
9803 	map = trace_eval_maps;
9804 
9805 	while (map) {
9806 		if (map->head.mod == mod)
9807 			break;
9808 		map = trace_eval_jmp_to_tail(map);
9809 		last = &map->tail.next;
9810 		map = map->tail.next;
9811 	}
9812 	if (!map)
9813 		goto out;
9814 
9815 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9816 	kfree(map);
9817  out:
9818 	mutex_unlock(&trace_eval_mutex);
9819 }
9820 #else
trace_module_remove_evals(struct module * mod)9821 static inline void trace_module_remove_evals(struct module *mod) { }
9822 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9823 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9824 static int trace_module_notify(struct notifier_block *self,
9825 			       unsigned long val, void *data)
9826 {
9827 	struct module *mod = data;
9828 
9829 	switch (val) {
9830 	case MODULE_STATE_COMING:
9831 		trace_module_add_evals(mod);
9832 		break;
9833 	case MODULE_STATE_GOING:
9834 		trace_module_remove_evals(mod);
9835 		break;
9836 	}
9837 
9838 	return NOTIFY_OK;
9839 }
9840 
9841 static struct notifier_block trace_module_nb = {
9842 	.notifier_call = trace_module_notify,
9843 	.priority = 0,
9844 };
9845 #endif /* CONFIG_MODULES */
9846 
tracer_init_tracefs(void)9847 static __init int tracer_init_tracefs(void)
9848 {
9849 	int ret;
9850 
9851 	trace_access_lock_init();
9852 
9853 	ret = tracing_init_dentry();
9854 	if (ret)
9855 		return 0;
9856 
9857 	event_trace_init();
9858 
9859 	init_tracer_tracefs(&global_trace, NULL);
9860 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9861 
9862 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9863 			&global_trace, &tracing_thresh_fops);
9864 
9865 	trace_create_file("README", TRACE_MODE_READ, NULL,
9866 			NULL, &tracing_readme_fops);
9867 
9868 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9869 			NULL, &tracing_saved_cmdlines_fops);
9870 
9871 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9872 			  NULL, &tracing_saved_cmdlines_size_fops);
9873 
9874 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9875 			NULL, &tracing_saved_tgids_fops);
9876 
9877 	trace_eval_init();
9878 
9879 	trace_create_eval_file(NULL);
9880 
9881 #ifdef CONFIG_MODULES
9882 	register_module_notifier(&trace_module_nb);
9883 #endif
9884 
9885 #ifdef CONFIG_DYNAMIC_FTRACE
9886 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9887 			NULL, &tracing_dyn_info_fops);
9888 #endif
9889 
9890 	create_trace_instances(NULL);
9891 
9892 	update_tracer_options(&global_trace);
9893 
9894 	return 0;
9895 }
9896 
9897 fs_initcall(tracer_init_tracefs);
9898 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9899 static int trace_panic_handler(struct notifier_block *this,
9900 			       unsigned long event, void *unused)
9901 {
9902 	bool ftrace_check = false;
9903 
9904 	trace_android_vh_ftrace_oops_enter(&ftrace_check);
9905 
9906 	if (ftrace_check)
9907 		return NOTIFY_OK;
9908 
9909 	if (ftrace_dump_on_oops)
9910 		ftrace_dump(ftrace_dump_on_oops);
9911 
9912 	trace_android_vh_ftrace_oops_exit(&ftrace_check);
9913 	return NOTIFY_OK;
9914 }
9915 
9916 static struct notifier_block trace_panic_notifier = {
9917 	.notifier_call  = trace_panic_handler,
9918 	.next           = NULL,
9919 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9920 };
9921 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9922 static int trace_die_handler(struct notifier_block *self,
9923 			     unsigned long val,
9924 			     void *data)
9925 {
9926 	bool ftrace_check = false;
9927 
9928 	trace_android_vh_ftrace_oops_enter(&ftrace_check);
9929 
9930 	if (ftrace_check)
9931 		return NOTIFY_OK;
9932 
9933 	switch (val) {
9934 	case DIE_OOPS:
9935 		if (ftrace_dump_on_oops)
9936 			ftrace_dump(ftrace_dump_on_oops);
9937 		break;
9938 	default:
9939 		break;
9940 	}
9941 
9942 	trace_android_vh_ftrace_oops_exit(&ftrace_check);
9943 	return NOTIFY_OK;
9944 }
9945 
9946 static struct notifier_block trace_die_notifier = {
9947 	.notifier_call = trace_die_handler,
9948 	.priority = 200
9949 };
9950 
9951 /*
9952  * printk is set to max of 1024, we really don't need it that big.
9953  * Nothing should be printing 1000 characters anyway.
9954  */
9955 #define TRACE_MAX_PRINT		1000
9956 
9957 /*
9958  * Define here KERN_TRACE so that we have one place to modify
9959  * it if we decide to change what log level the ftrace dump
9960  * should be at.
9961  */
9962 #define KERN_TRACE		KERN_EMERG
9963 
9964 void
trace_printk_seq(struct trace_seq * s)9965 trace_printk_seq(struct trace_seq *s)
9966 {
9967 	bool dump_printk = true;
9968 
9969 	/* Probably should print a warning here. */
9970 	if (s->seq.len >= TRACE_MAX_PRINT)
9971 		s->seq.len = TRACE_MAX_PRINT;
9972 
9973 	/*
9974 	 * More paranoid code. Although the buffer size is set to
9975 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9976 	 * an extra layer of protection.
9977 	 */
9978 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9979 		s->seq.len = s->seq.size - 1;
9980 
9981 	/* should be zero ended, but we are paranoid. */
9982 	s->buffer[s->seq.len] = 0;
9983 
9984 	trace_android_vh_ftrace_dump_buffer(s, &dump_printk);
9985 	if (dump_printk)
9986 		printk(KERN_TRACE "%s", s->buffer);
9987 
9988 	trace_seq_init(s);
9989 }
9990 
trace_init_global_iter(struct trace_iterator * iter)9991 void trace_init_global_iter(struct trace_iterator *iter)
9992 {
9993 	iter->tr = &global_trace;
9994 	iter->trace = iter->tr->current_trace;
9995 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9996 	iter->array_buffer = &global_trace.array_buffer;
9997 
9998 	if (iter->trace && iter->trace->open)
9999 		iter->trace->open(iter);
10000 
10001 	/* Annotate start of buffers if we had overruns */
10002 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10003 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10004 
10005 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10006 	if (trace_clocks[iter->tr->clock_id].in_ns)
10007 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10008 
10009 	/* Can not use kmalloc for iter.temp and iter.fmt */
10010 	iter->temp = static_temp_buf;
10011 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10012 	iter->fmt = static_fmt_buf;
10013 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10014 }
10015 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10016 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10017 {
10018 	/* use static because iter can be a bit big for the stack */
10019 	static struct trace_iterator iter;
10020 	static atomic_t dump_running;
10021 	struct trace_array *tr = &global_trace;
10022 	unsigned int old_userobj;
10023 	unsigned long flags;
10024 	int cnt = 0, cpu;
10025 	bool ftrace_check = false;
10026 	unsigned long size;
10027 
10028 	/* Only allow one dump user at a time. */
10029 	if (atomic_inc_return(&dump_running) != 1) {
10030 		atomic_dec(&dump_running);
10031 		return;
10032 	}
10033 
10034 	/*
10035 	 * Always turn off tracing when we dump.
10036 	 * We don't need to show trace output of what happens
10037 	 * between multiple crashes.
10038 	 *
10039 	 * If the user does a sysrq-z, then they can re-enable
10040 	 * tracing with echo 1 > tracing_on.
10041 	 */
10042 	tracing_off();
10043 
10044 	local_irq_save(flags);
10045 
10046 	/* Simulate the iterator */
10047 	trace_init_global_iter(&iter);
10048 
10049 	for_each_tracing_cpu(cpu) {
10050 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10051 		size = ring_buffer_size(iter.array_buffer->buffer, cpu);
10052 		trace_android_vh_ftrace_size_check(size, &ftrace_check);
10053 	}
10054 
10055 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10056 
10057 	/* don't look at user memory in panic mode */
10058 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10059 
10060 	if (ftrace_check)
10061 		goto out_enable;
10062 
10063 	switch (oops_dump_mode) {
10064 	case DUMP_ALL:
10065 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10066 		break;
10067 	case DUMP_ORIG:
10068 		iter.cpu_file = raw_smp_processor_id();
10069 		break;
10070 	case DUMP_NONE:
10071 		goto out_enable;
10072 	default:
10073 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10074 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10075 	}
10076 
10077 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10078 
10079 	/* Did function tracer already get disabled? */
10080 	if (ftrace_is_dead()) {
10081 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10082 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10083 	}
10084 
10085 	/*
10086 	 * We need to stop all tracing on all CPUS to read
10087 	 * the next buffer. This is a bit expensive, but is
10088 	 * not done often. We fill all what we can read,
10089 	 * and then release the locks again.
10090 	 */
10091 
10092 	while (!trace_empty(&iter)) {
10093 		ftrace_check = true;
10094 
10095 		if (!cnt)
10096 			printk(KERN_TRACE "---------------------------------\n");
10097 
10098 		cnt++;
10099 
10100 		trace_iterator_reset(&iter);
10101 		trace_android_vh_ftrace_format_check(&ftrace_check);
10102 		if (ftrace_check)
10103 			iter.iter_flags |= TRACE_FILE_LAT_FMT;
10104 
10105 		if (trace_find_next_entry_inc(&iter) != NULL) {
10106 			int ret;
10107 
10108 			ret = print_trace_line(&iter);
10109 			if (ret != TRACE_TYPE_NO_CONSUME)
10110 				trace_consume(&iter);
10111 		}
10112 		touch_nmi_watchdog();
10113 
10114 		trace_printk_seq(&iter.seq);
10115 	}
10116 
10117 	if (!cnt)
10118 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10119 	else
10120 		printk(KERN_TRACE "---------------------------------\n");
10121 
10122  out_enable:
10123 	tr->trace_flags |= old_userobj;
10124 
10125 	for_each_tracing_cpu(cpu) {
10126 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10127 	}
10128 	atomic_dec(&dump_running);
10129 	local_irq_restore(flags);
10130 }
10131 EXPORT_SYMBOL_GPL(ftrace_dump);
10132 
10133 #define WRITE_BUFSIZE  4096
10134 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10135 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10136 				size_t count, loff_t *ppos,
10137 				int (*createfn)(const char *))
10138 {
10139 	char *kbuf, *buf, *tmp;
10140 	int ret = 0;
10141 	size_t done = 0;
10142 	size_t size;
10143 
10144 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10145 	if (!kbuf)
10146 		return -ENOMEM;
10147 
10148 	while (done < count) {
10149 		size = count - done;
10150 
10151 		if (size >= WRITE_BUFSIZE)
10152 			size = WRITE_BUFSIZE - 1;
10153 
10154 		if (copy_from_user(kbuf, buffer + done, size)) {
10155 			ret = -EFAULT;
10156 			goto out;
10157 		}
10158 		kbuf[size] = '\0';
10159 		buf = kbuf;
10160 		do {
10161 			tmp = strchr(buf, '\n');
10162 			if (tmp) {
10163 				*tmp = '\0';
10164 				size = tmp - buf + 1;
10165 			} else {
10166 				size = strlen(buf);
10167 				if (done + size < count) {
10168 					if (buf != kbuf)
10169 						break;
10170 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10171 					pr_warn("Line length is too long: Should be less than %d\n",
10172 						WRITE_BUFSIZE - 2);
10173 					ret = -EINVAL;
10174 					goto out;
10175 				}
10176 			}
10177 			done += size;
10178 
10179 			/* Remove comments */
10180 			tmp = strchr(buf, '#');
10181 
10182 			if (tmp)
10183 				*tmp = '\0';
10184 
10185 			ret = createfn(buf);
10186 			if (ret)
10187 				goto out;
10188 			buf += size;
10189 
10190 		} while (done < count);
10191 	}
10192 	ret = done;
10193 
10194 out:
10195 	kfree(kbuf);
10196 
10197 	return ret;
10198 }
10199 
tracer_alloc_buffers(void)10200 __init static int tracer_alloc_buffers(void)
10201 {
10202 	int ring_buf_size;
10203 	int ret = -ENOMEM;
10204 
10205 
10206 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10207 		pr_warn("Tracing disabled due to lockdown\n");
10208 		return -EPERM;
10209 	}
10210 
10211 	/*
10212 	 * Make sure we don't accidentally add more trace options
10213 	 * than we have bits for.
10214 	 */
10215 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10216 
10217 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10218 		goto out;
10219 
10220 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10221 		goto out_free_buffer_mask;
10222 
10223 	/* Only allocate trace_printk buffers if a trace_printk exists */
10224 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10225 		/* Must be called before global_trace.buffer is allocated */
10226 		trace_printk_init_buffers();
10227 
10228 	/* To save memory, keep the ring buffer size to its minimum */
10229 	if (ring_buffer_expanded)
10230 		ring_buf_size = trace_buf_size;
10231 	else
10232 		ring_buf_size = 1;
10233 
10234 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10235 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10236 
10237 	raw_spin_lock_init(&global_trace.start_lock);
10238 
10239 	/*
10240 	 * The prepare callbacks allocates some memory for the ring buffer. We
10241 	 * don't free the buffer if the CPU goes down. If we were to free
10242 	 * the buffer, then the user would lose any trace that was in the
10243 	 * buffer. The memory will be removed once the "instance" is removed.
10244 	 */
10245 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10246 				      "trace/RB:preapre", trace_rb_cpu_prepare,
10247 				      NULL);
10248 	if (ret < 0)
10249 		goto out_free_cpumask;
10250 	/* Used for event triggers */
10251 	ret = -ENOMEM;
10252 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10253 	if (!temp_buffer)
10254 		goto out_rm_hp_state;
10255 
10256 	if (trace_create_savedcmd() < 0)
10257 		goto out_free_temp_buffer;
10258 
10259 	/* TODO: make the number of buffers hot pluggable with CPUS */
10260 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10261 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10262 		goto out_free_savedcmd;
10263 	}
10264 
10265 	if (global_trace.buffer_disabled)
10266 		tracing_off();
10267 
10268 	if (trace_boot_clock) {
10269 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10270 		if (ret < 0)
10271 			pr_warn("Trace clock %s not defined, going back to default\n",
10272 				trace_boot_clock);
10273 	}
10274 
10275 	/*
10276 	 * register_tracer() might reference current_trace, so it
10277 	 * needs to be set before we register anything. This is
10278 	 * just a bootstrap of current_trace anyway.
10279 	 */
10280 	global_trace.current_trace = &nop_trace;
10281 
10282 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10283 
10284 	ftrace_init_global_array_ops(&global_trace);
10285 
10286 	init_trace_flags_index(&global_trace);
10287 
10288 	register_tracer(&nop_trace);
10289 
10290 	/* Function tracing may start here (via kernel command line) */
10291 	init_function_trace();
10292 
10293 	/* All seems OK, enable tracing */
10294 	tracing_disabled = 0;
10295 
10296 	atomic_notifier_chain_register(&panic_notifier_list,
10297 				       &trace_panic_notifier);
10298 
10299 	register_die_notifier(&trace_die_notifier);
10300 
10301 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10302 
10303 	INIT_LIST_HEAD(&global_trace.systems);
10304 	INIT_LIST_HEAD(&global_trace.events);
10305 	INIT_LIST_HEAD(&global_trace.hist_vars);
10306 	INIT_LIST_HEAD(&global_trace.err_log);
10307 	list_add(&global_trace.list, &ftrace_trace_arrays);
10308 
10309 	apply_trace_boot_options();
10310 
10311 	register_snapshot_cmd();
10312 
10313 	test_can_verify();
10314 
10315 	return 0;
10316 
10317 out_free_savedcmd:
10318 	free_saved_cmdlines_buffer(savedcmd);
10319 out_free_temp_buffer:
10320 	ring_buffer_free(temp_buffer);
10321 out_rm_hp_state:
10322 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10323 out_free_cpumask:
10324 	free_cpumask_var(global_trace.tracing_cpumask);
10325 out_free_buffer_mask:
10326 	free_cpumask_var(tracing_buffer_mask);
10327 out:
10328 	return ret;
10329 }
10330 
early_trace_init(void)10331 void __init early_trace_init(void)
10332 {
10333 	if (tracepoint_printk) {
10334 		tracepoint_print_iter =
10335 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10336 		if (MEM_FAIL(!tracepoint_print_iter,
10337 			     "Failed to allocate trace iterator\n"))
10338 			tracepoint_printk = 0;
10339 		else
10340 			static_key_enable(&tracepoint_printk_key.key);
10341 	}
10342 	tracer_alloc_buffers();
10343 
10344 	init_events();
10345 }
10346 
trace_init(void)10347 void __init trace_init(void)
10348 {
10349 	trace_event_init();
10350 }
10351 
clear_boot_tracer(void)10352 __init static void clear_boot_tracer(void)
10353 {
10354 	/*
10355 	 * The default tracer at boot buffer is an init section.
10356 	 * This function is called in lateinit. If we did not
10357 	 * find the boot tracer, then clear it out, to prevent
10358 	 * later registration from accessing the buffer that is
10359 	 * about to be freed.
10360 	 */
10361 	if (!default_bootup_tracer)
10362 		return;
10363 
10364 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10365 	       default_bootup_tracer);
10366 	default_bootup_tracer = NULL;
10367 }
10368 
10369 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10370 __init static void tracing_set_default_clock(void)
10371 {
10372 	/* sched_clock_stable() is determined in late_initcall */
10373 	if (!trace_boot_clock && !sched_clock_stable()) {
10374 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10375 			pr_warn("Can not set tracing clock due to lockdown\n");
10376 			return;
10377 		}
10378 
10379 		printk(KERN_WARNING
10380 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10381 		       "If you want to keep using the local clock, then add:\n"
10382 		       "  \"trace_clock=local\"\n"
10383 		       "on the kernel command line\n");
10384 		tracing_set_clock(&global_trace, "global");
10385 	}
10386 }
10387 #else
tracing_set_default_clock(void)10388 static inline void tracing_set_default_clock(void) { }
10389 #endif
10390 
late_trace_init(void)10391 __init static int late_trace_init(void)
10392 {
10393 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10394 		static_key_disable(&tracepoint_printk_key.key);
10395 		tracepoint_printk = 0;
10396 	}
10397 
10398 	tracing_set_default_clock();
10399 	clear_boot_tracer();
10400 	return 0;
10401 }
10402 
10403 late_initcall_sync(late_trace_init);
10404