• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned long flags, int pc);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 
set_cmdline_ftrace(char * str)187 static int __init set_cmdline_ftrace(char *str)
188 {
189 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 	default_bootup_tracer = bootup_tracer_buf;
191 	/* We are using ftrace early, expand it */
192 	ring_buffer_expanded = true;
193 	return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196 
set_ftrace_dump_on_oops(char * str)197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 	if (*str++ != '=' || !*str) {
200 		ftrace_dump_on_oops = DUMP_ALL;
201 		return 1;
202 	}
203 
204 	if (!strcmp("orig_cpu", str)) {
205 		ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208 
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212 
stop_trace_on_warning(char * str)213 static int __init stop_trace_on_warning(char *str)
214 {
215 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 		__disable_trace_on_warning = 1;
217 	return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220 
boot_alloc_snapshot(char * str)221 static int __init boot_alloc_snapshot(char *str)
222 {
223 	allocate_snapshot = true;
224 	/* We also need the main ring buffer expanded */
225 	ring_buffer_expanded = true;
226 	return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229 
230 
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232 
set_trace_boot_options(char * str)233 static int __init set_trace_boot_options(char *str)
234 {
235 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 	return 1;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239 
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242 
set_trace_boot_clock(char * str)243 static int __init set_trace_boot_clock(char *str)
244 {
245 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 	trace_boot_clock = trace_boot_clock_buf;
247 	return 1;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250 
set_tracepoint_printk(char * str)251 static int __init set_tracepoint_printk(char *str)
252 {
253 	/* Ignore the "tp_printk_stop_on_boot" param */
254 	if (*str == '_')
255 		return 0;
256 
257 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
258 		tracepoint_printk = 1;
259 	return 1;
260 }
261 __setup("tp_printk", set_tracepoint_printk);
262 
ns2usecs(u64 nsec)263 unsigned long long ns2usecs(u64 nsec)
264 {
265 	nsec += 500;
266 	do_div(nsec, 1000);
267 	return nsec;
268 }
269 
270 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)271 trace_process_export(struct trace_export *export,
272 	       struct ring_buffer_event *event, int flag)
273 {
274 	struct trace_entry *entry;
275 	unsigned int size = 0;
276 
277 	if (export->flags & flag) {
278 		entry = ring_buffer_event_data(event);
279 		size = ring_buffer_event_length(event);
280 		export->write(export, entry, size);
281 	}
282 }
283 
284 static DEFINE_MUTEX(ftrace_export_lock);
285 
286 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
287 
288 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
289 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
290 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
291 
ftrace_exports_enable(struct trace_export * export)292 static inline void ftrace_exports_enable(struct trace_export *export)
293 {
294 	if (export->flags & TRACE_EXPORT_FUNCTION)
295 		static_branch_inc(&trace_function_exports_enabled);
296 
297 	if (export->flags & TRACE_EXPORT_EVENT)
298 		static_branch_inc(&trace_event_exports_enabled);
299 
300 	if (export->flags & TRACE_EXPORT_MARKER)
301 		static_branch_inc(&trace_marker_exports_enabled);
302 }
303 
ftrace_exports_disable(struct trace_export * export)304 static inline void ftrace_exports_disable(struct trace_export *export)
305 {
306 	if (export->flags & TRACE_EXPORT_FUNCTION)
307 		static_branch_dec(&trace_function_exports_enabled);
308 
309 	if (export->flags & TRACE_EXPORT_EVENT)
310 		static_branch_dec(&trace_event_exports_enabled);
311 
312 	if (export->flags & TRACE_EXPORT_MARKER)
313 		static_branch_dec(&trace_marker_exports_enabled);
314 }
315 
ftrace_exports(struct ring_buffer_event * event,int flag)316 static void ftrace_exports(struct ring_buffer_event *event, int flag)
317 {
318 	struct trace_export *export;
319 
320 	preempt_disable_notrace();
321 
322 	export = rcu_dereference_raw_check(ftrace_exports_list);
323 	while (export) {
324 		trace_process_export(export, event, flag);
325 		export = rcu_dereference_raw_check(export->next);
326 	}
327 
328 	preempt_enable_notrace();
329 }
330 
331 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)332 add_trace_export(struct trace_export **list, struct trace_export *export)
333 {
334 	rcu_assign_pointer(export->next, *list);
335 	/*
336 	 * We are entering export into the list but another
337 	 * CPU might be walking that list. We need to make sure
338 	 * the export->next pointer is valid before another CPU sees
339 	 * the export pointer included into the list.
340 	 */
341 	rcu_assign_pointer(*list, export);
342 }
343 
344 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)345 rm_trace_export(struct trace_export **list, struct trace_export *export)
346 {
347 	struct trace_export **p;
348 
349 	for (p = list; *p != NULL; p = &(*p)->next)
350 		if (*p == export)
351 			break;
352 
353 	if (*p != export)
354 		return -1;
355 
356 	rcu_assign_pointer(*p, (*p)->next);
357 
358 	return 0;
359 }
360 
361 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)362 add_ftrace_export(struct trace_export **list, struct trace_export *export)
363 {
364 	ftrace_exports_enable(export);
365 
366 	add_trace_export(list, export);
367 }
368 
369 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)370 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
371 {
372 	int ret;
373 
374 	ret = rm_trace_export(list, export);
375 	ftrace_exports_disable(export);
376 
377 	return ret;
378 }
379 
register_ftrace_export(struct trace_export * export)380 int register_ftrace_export(struct trace_export *export)
381 {
382 	if (WARN_ON_ONCE(!export->write))
383 		return -1;
384 
385 	mutex_lock(&ftrace_export_lock);
386 
387 	add_ftrace_export(&ftrace_exports_list, export);
388 
389 	mutex_unlock(&ftrace_export_lock);
390 
391 	return 0;
392 }
393 EXPORT_SYMBOL_GPL(register_ftrace_export);
394 
unregister_ftrace_export(struct trace_export * export)395 int unregister_ftrace_export(struct trace_export *export)
396 {
397 	int ret;
398 
399 	mutex_lock(&ftrace_export_lock);
400 
401 	ret = rm_ftrace_export(&ftrace_exports_list, export);
402 
403 	mutex_unlock(&ftrace_export_lock);
404 
405 	return ret;
406 }
407 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
408 
409 /* trace_flags holds trace_options default values */
410 #define TRACE_DEFAULT_FLAGS						\
411 	(FUNCTION_DEFAULT_FLAGS |					\
412 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
413 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
414 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
415 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
416 
417 /* trace_options that are only supported by global_trace */
418 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
419 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
420 
421 /* trace_flags that are default zero for instances */
422 #define ZEROED_TRACE_FLAGS \
423 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
424 
425 /*
426  * The global_trace is the descriptor that holds the top-level tracing
427  * buffers for the live tracing.
428  */
429 static struct trace_array global_trace = {
430 	.trace_flags = TRACE_DEFAULT_FLAGS,
431 };
432 
433 LIST_HEAD(ftrace_trace_arrays);
434 
trace_array_get(struct trace_array * this_tr)435 int trace_array_get(struct trace_array *this_tr)
436 {
437 	struct trace_array *tr;
438 	int ret = -ENODEV;
439 
440 	mutex_lock(&trace_types_lock);
441 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
442 		if (tr == this_tr) {
443 			tr->ref++;
444 			ret = 0;
445 			break;
446 		}
447 	}
448 	mutex_unlock(&trace_types_lock);
449 
450 	return ret;
451 }
452 
__trace_array_put(struct trace_array * this_tr)453 static void __trace_array_put(struct trace_array *this_tr)
454 {
455 	WARN_ON(!this_tr->ref);
456 	this_tr->ref--;
457 }
458 
459 /**
460  * trace_array_put - Decrement the reference counter for this trace array.
461  *
462  * NOTE: Use this when we no longer need the trace array returned by
463  * trace_array_get_by_name(). This ensures the trace array can be later
464  * destroyed.
465  *
466  */
trace_array_put(struct trace_array * this_tr)467 void trace_array_put(struct trace_array *this_tr)
468 {
469 	if (!this_tr)
470 		return;
471 
472 	mutex_lock(&trace_types_lock);
473 	__trace_array_put(this_tr);
474 	mutex_unlock(&trace_types_lock);
475 }
476 EXPORT_SYMBOL_GPL(trace_array_put);
477 
tracing_check_open_get_tr(struct trace_array * tr)478 int tracing_check_open_get_tr(struct trace_array *tr)
479 {
480 	int ret;
481 
482 	ret = security_locked_down(LOCKDOWN_TRACEFS);
483 	if (ret)
484 		return ret;
485 
486 	if (tracing_disabled)
487 		return -ENODEV;
488 
489 	if (tr && trace_array_get(tr) < 0)
490 		return -ENODEV;
491 
492 	return 0;
493 }
494 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)495 int call_filter_check_discard(struct trace_event_call *call, void *rec,
496 			      struct trace_buffer *buffer,
497 			      struct ring_buffer_event *event)
498 {
499 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
500 	    !filter_match_preds(call->filter, rec)) {
501 		__trace_event_discard_commit(buffer, event);
502 		return 1;
503 	}
504 
505 	return 0;
506 }
507 
trace_free_pid_list(struct trace_pid_list * pid_list)508 void trace_free_pid_list(struct trace_pid_list *pid_list)
509 {
510 	vfree(pid_list->pids);
511 	kfree(pid_list);
512 }
513 
514 /**
515  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
516  * @filtered_pids: The list of pids to check
517  * @search_pid: The PID to find in @filtered_pids
518  *
519  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
520  */
521 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)522 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
523 {
524 	/*
525 	 * If pid_max changed after filtered_pids was created, we
526 	 * by default ignore all pids greater than the previous pid_max.
527 	 */
528 	if (search_pid >= filtered_pids->pid_max)
529 		return false;
530 
531 	return test_bit(search_pid, filtered_pids->pids);
532 }
533 
534 /**
535  * trace_ignore_this_task - should a task be ignored for tracing
536  * @filtered_pids: The list of pids to check
537  * @task: The task that should be ignored if not filtered
538  *
539  * Checks if @task should be traced or not from @filtered_pids.
540  * Returns true if @task should *NOT* be traced.
541  * Returns false if @task should be traced.
542  */
543 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)544 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
545 		       struct trace_pid_list *filtered_no_pids,
546 		       struct task_struct *task)
547 {
548 	/*
549 	 * If filterd_no_pids is not empty, and the task's pid is listed
550 	 * in filtered_no_pids, then return true.
551 	 * Otherwise, if filtered_pids is empty, that means we can
552 	 * trace all tasks. If it has content, then only trace pids
553 	 * within filtered_pids.
554 	 */
555 
556 	return (filtered_pids &&
557 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 		(filtered_no_pids &&
559 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
560 }
561 
562 /**
563  * trace_filter_add_remove_task - Add or remove a task from a pid_list
564  * @pid_list: The list to modify
565  * @self: The current task for fork or NULL for exit
566  * @task: The task to add or remove
567  *
568  * If adding a task, if @self is defined, the task is only added if @self
569  * is also included in @pid_list. This happens on fork and tasks should
570  * only be added when the parent is listed. If @self is NULL, then the
571  * @task pid will be removed from the list, which would happen on exit
572  * of a task.
573  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)574 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
575 				  struct task_struct *self,
576 				  struct task_struct *task)
577 {
578 	if (!pid_list)
579 		return;
580 
581 	/* For forks, we only add if the forking task is listed */
582 	if (self) {
583 		if (!trace_find_filtered_pid(pid_list, self->pid))
584 			return;
585 	}
586 
587 	/* Sorry, but we don't support pid_max changing after setting */
588 	if (task->pid >= pid_list->pid_max)
589 		return;
590 
591 	/* "self" is set for forks, and NULL for exits */
592 	if (self)
593 		set_bit(task->pid, pid_list->pids);
594 	else
595 		clear_bit(task->pid, pid_list->pids);
596 }
597 
598 /**
599  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
600  * @pid_list: The pid list to show
601  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
602  * @pos: The position of the file
603  *
604  * This is used by the seq_file "next" operation to iterate the pids
605  * listed in a trace_pid_list structure.
606  *
607  * Returns the pid+1 as we want to display pid of zero, but NULL would
608  * stop the iteration.
609  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)610 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611 {
612 	unsigned long pid = (unsigned long)v;
613 
614 	(*pos)++;
615 
616 	/* pid already is +1 of the actual prevous bit */
617 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618 
619 	/* Return pid + 1 to allow zero to be represented */
620 	if (pid < pid_list->pid_max)
621 		return (void *)(pid + 1);
622 
623 	return NULL;
624 }
625 
626 /**
627  * trace_pid_start - Used for seq_file to start reading pid lists
628  * @pid_list: The pid list to show
629  * @pos: The position of the file
630  *
631  * This is used by seq_file "start" operation to start the iteration
632  * of listing pids.
633  *
634  * Returns the pid+1 as we want to display pid of zero, but NULL would
635  * stop the iteration.
636  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)637 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
638 {
639 	unsigned long pid;
640 	loff_t l = 0;
641 
642 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
643 	if (pid >= pid_list->pid_max)
644 		return NULL;
645 
646 	/* Return pid + 1 so that zero can be the exit value */
647 	for (pid++; pid && l < *pos;
648 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 		;
650 	return (void *)pid;
651 }
652 
653 /**
654  * trace_pid_show - show the current pid in seq_file processing
655  * @m: The seq_file structure to write into
656  * @v: A void pointer of the pid (+1) value to display
657  *
658  * Can be directly used by seq_file operations to display the current
659  * pid value.
660  */
trace_pid_show(struct seq_file * m,void * v)661 int trace_pid_show(struct seq_file *m, void *v)
662 {
663 	unsigned long pid = (unsigned long)v - 1;
664 
665 	seq_printf(m, "%lu\n", pid);
666 	return 0;
667 }
668 
669 /* 128 should be much more than enough */
670 #define PID_BUF_SIZE		127
671 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)672 int trace_pid_write(struct trace_pid_list *filtered_pids,
673 		    struct trace_pid_list **new_pid_list,
674 		    const char __user *ubuf, size_t cnt)
675 {
676 	struct trace_pid_list *pid_list;
677 	struct trace_parser parser;
678 	unsigned long val;
679 	int nr_pids = 0;
680 	ssize_t read = 0;
681 	ssize_t ret = 0;
682 	loff_t pos;
683 	pid_t pid;
684 
685 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 		return -ENOMEM;
687 
688 	/*
689 	 * Always recreate a new array. The write is an all or nothing
690 	 * operation. Always create a new array when adding new pids by
691 	 * the user. If the operation fails, then the current list is
692 	 * not modified.
693 	 */
694 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
695 	if (!pid_list) {
696 		trace_parser_put(&parser);
697 		return -ENOMEM;
698 	}
699 
700 	pid_list->pid_max = READ_ONCE(pid_max);
701 
702 	/* Only truncating will shrink pid_max */
703 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
704 		pid_list->pid_max = filtered_pids->pid_max;
705 
706 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
707 	if (!pid_list->pids) {
708 		trace_parser_put(&parser);
709 		kfree(pid_list);
710 		return -ENOMEM;
711 	}
712 
713 	if (filtered_pids) {
714 		/* copy the current bits to the new max */
715 		for_each_set_bit(pid, filtered_pids->pids,
716 				 filtered_pids->pid_max) {
717 			set_bit(pid, pid_list->pids);
718 			nr_pids++;
719 		}
720 	}
721 
722 	while (cnt > 0) {
723 
724 		pos = 0;
725 
726 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
727 		if (ret < 0 || !trace_parser_loaded(&parser))
728 			break;
729 
730 		read += ret;
731 		ubuf += ret;
732 		cnt -= ret;
733 
734 		ret = -EINVAL;
735 		if (kstrtoul(parser.buffer, 0, &val))
736 			break;
737 		if (val >= pid_list->pid_max)
738 			break;
739 
740 		pid = (pid_t)val;
741 
742 		set_bit(pid, pid_list->pids);
743 		nr_pids++;
744 
745 		trace_parser_clear(&parser);
746 		ret = 0;
747 	}
748 	trace_parser_put(&parser);
749 
750 	if (ret < 0) {
751 		trace_free_pid_list(pid_list);
752 		return ret;
753 	}
754 
755 	if (!nr_pids) {
756 		/* Cleared the list of pids */
757 		trace_free_pid_list(pid_list);
758 		read = ret;
759 		pid_list = NULL;
760 	}
761 
762 	*new_pid_list = pid_list;
763 
764 	return read;
765 }
766 
buffer_ftrace_now(struct array_buffer * buf,int cpu)767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
768 {
769 	u64 ts;
770 
771 	/* Early boot up does not have a buffer yet */
772 	if (!buf->buffer)
773 		return trace_clock_local();
774 
775 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
776 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777 
778 	return ts;
779 }
780 
ftrace_now(int cpu)781 u64 ftrace_now(int cpu)
782 {
783 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
784 }
785 
786 /**
787  * tracing_is_enabled - Show if global_trace has been disabled
788  *
789  * Shows if the global trace has been enabled or not. It uses the
790  * mirror flag "buffer_disabled" to be used in fast paths such as for
791  * the irqsoff tracer. But it may be inaccurate due to races. If you
792  * need to know the accurate state, use tracing_is_on() which is a little
793  * slower, but accurate.
794  */
tracing_is_enabled(void)795 int tracing_is_enabled(void)
796 {
797 	/*
798 	 * For quick access (irqsoff uses this in fast path), just
799 	 * return the mirror variable of the state of the ring buffer.
800 	 * It's a little racy, but we don't really care.
801 	 */
802 	smp_rmb();
803 	return !global_trace.buffer_disabled;
804 }
805 
806 /*
807  * trace_buf_size is the size in bytes that is allocated
808  * for a buffer. Note, the number of bytes is always rounded
809  * to page size.
810  *
811  * This number is purposely set to a low number of 16384.
812  * If the dump on oops happens, it will be much appreciated
813  * to not have to wait for all that output. Anyway this can be
814  * boot time and run time configurable.
815  */
816 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
817 
818 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 
820 /* trace_types holds a link list of available tracers. */
821 static struct tracer		*trace_types __read_mostly;
822 
823 /*
824  * trace_types_lock is used to protect the trace_types list.
825  */
826 DEFINE_MUTEX(trace_types_lock);
827 
828 /*
829  * serialize the access of the ring buffer
830  *
831  * ring buffer serializes readers, but it is low level protection.
832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
833  * are not protected by ring buffer.
834  *
835  * The content of events may become garbage if we allow other process consumes
836  * these events concurrently:
837  *   A) the page of the consumed events may become a normal page
838  *      (not reader page) in ring buffer, and this page will be rewrited
839  *      by events producer.
840  *   B) The page of the consumed events may become a page for splice_read,
841  *      and this page will be returned to system.
842  *
843  * These primitives allow multi process access to different cpu ring buffer
844  * concurrently.
845  *
846  * These primitives don't distinguish read-only and read-consume access.
847  * Multi read-only access are also serialized.
848  */
849 
850 #ifdef CONFIG_SMP
851 static DECLARE_RWSEM(all_cpu_access_lock);
852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 
trace_access_lock(int cpu)854 static inline void trace_access_lock(int cpu)
855 {
856 	if (cpu == RING_BUFFER_ALL_CPUS) {
857 		/* gain it for accessing the whole ring buffer. */
858 		down_write(&all_cpu_access_lock);
859 	} else {
860 		/* gain it for accessing a cpu ring buffer. */
861 
862 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 		down_read(&all_cpu_access_lock);
864 
865 		/* Secondly block other access to this @cpu ring buffer. */
866 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 	}
868 }
869 
trace_access_unlock(int cpu)870 static inline void trace_access_unlock(int cpu)
871 {
872 	if (cpu == RING_BUFFER_ALL_CPUS) {
873 		up_write(&all_cpu_access_lock);
874 	} else {
875 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 		up_read(&all_cpu_access_lock);
877 	}
878 }
879 
trace_access_lock_init(void)880 static inline void trace_access_lock_init(void)
881 {
882 	int cpu;
883 
884 	for_each_possible_cpu(cpu)
885 		mutex_init(&per_cpu(cpu_access_lock, cpu));
886 }
887 
888 #else
889 
890 static DEFINE_MUTEX(access_lock);
891 
trace_access_lock(int cpu)892 static inline void trace_access_lock(int cpu)
893 {
894 	(void)cpu;
895 	mutex_lock(&access_lock);
896 }
897 
trace_access_unlock(int cpu)898 static inline void trace_access_unlock(int cpu)
899 {
900 	(void)cpu;
901 	mutex_unlock(&access_lock);
902 }
903 
trace_access_lock_init(void)904 static inline void trace_access_lock_init(void)
905 {
906 }
907 
908 #endif
909 
910 #ifdef CONFIG_STACKTRACE
911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
912 				 unsigned long flags,
913 				 int skip, int pc, struct pt_regs *regs);
914 static inline void ftrace_trace_stack(struct trace_array *tr,
915 				      struct trace_buffer *buffer,
916 				      unsigned long flags,
917 				      int skip, int pc, struct pt_regs *regs);
918 
919 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
921 					unsigned long flags,
922 					int skip, int pc, struct pt_regs *regs)
923 {
924 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)925 static inline void ftrace_trace_stack(struct trace_array *tr,
926 				      struct trace_buffer *buffer,
927 				      unsigned long flags,
928 				      int skip, int pc, struct pt_regs *regs)
929 {
930 }
931 
932 #endif
933 
934 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)935 trace_event_setup(struct ring_buffer_event *event,
936 		  int type, unsigned long flags, int pc)
937 {
938 	struct trace_entry *ent = ring_buffer_event_data(event);
939 
940 	tracing_generic_entry_update(ent, type, flags, pc);
941 }
942 
943 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
945 			  int type,
946 			  unsigned long len,
947 			  unsigned long flags, int pc)
948 {
949 	struct ring_buffer_event *event;
950 
951 	event = ring_buffer_lock_reserve(buffer, len);
952 	if (event != NULL)
953 		trace_event_setup(event, type, flags, pc);
954 
955 	return event;
956 }
957 
tracer_tracing_on(struct trace_array * tr)958 void tracer_tracing_on(struct trace_array *tr)
959 {
960 	if (tr->array_buffer.buffer)
961 		ring_buffer_record_on(tr->array_buffer.buffer);
962 	/*
963 	 * This flag is looked at when buffers haven't been allocated
964 	 * yet, or by some tracers (like irqsoff), that just want to
965 	 * know if the ring buffer has been disabled, but it can handle
966 	 * races of where it gets disabled but we still do a record.
967 	 * As the check is in the fast path of the tracers, it is more
968 	 * important to be fast than accurate.
969 	 */
970 	tr->buffer_disabled = 0;
971 	/* Make the flag seen by readers */
972 	smp_wmb();
973 }
974 
975 /**
976  * tracing_on - enable tracing buffers
977  *
978  * This function enables tracing buffers that may have been
979  * disabled with tracing_off.
980  */
tracing_on(void)981 void tracing_on(void)
982 {
983 	tracer_tracing_on(&global_trace);
984 }
985 EXPORT_SYMBOL_GPL(tracing_on);
986 
987 
988 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 {
991 	__this_cpu_write(trace_taskinfo_save, true);
992 
993 	/* If this is the temp buffer, we need to commit fully */
994 	if (this_cpu_read(trace_buffered_event) == event) {
995 		/* Length is in event->array[0] */
996 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 		/* Release the temp buffer */
998 		this_cpu_dec(trace_buffered_event_cnt);
999 	} else
1000 		ring_buffer_unlock_commit(buffer, event);
1001 }
1002 
1003 /**
1004  * __trace_puts - write a constant string into the trace buffer.
1005  * @ip:	   The address of the caller
1006  * @str:   The constant string to write
1007  * @size:  The size of the string.
1008  */
__trace_puts(unsigned long ip,const char * str,int size)1009 int __trace_puts(unsigned long ip, const char *str, int size)
1010 {
1011 	struct ring_buffer_event *event;
1012 	struct trace_buffer *buffer;
1013 	struct print_entry *entry;
1014 	unsigned long irq_flags;
1015 	int alloc;
1016 	int pc;
1017 
1018 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019 		return 0;
1020 
1021 	pc = preempt_count();
1022 
1023 	if (unlikely(tracing_selftest_running || tracing_disabled))
1024 		return 0;
1025 
1026 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1027 
1028 	local_save_flags(irq_flags);
1029 	buffer = global_trace.array_buffer.buffer;
1030 	ring_buffer_nest_start(buffer);
1031 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1032 					    irq_flags, pc);
1033 	if (!event) {
1034 		size = 0;
1035 		goto out;
1036 	}
1037 
1038 	entry = ring_buffer_event_data(event);
1039 	entry->ip = ip;
1040 
1041 	memcpy(&entry->buf, str, size);
1042 
1043 	/* Add a newline if necessary */
1044 	if (entry->buf[size - 1] != '\n') {
1045 		entry->buf[size] = '\n';
1046 		entry->buf[size + 1] = '\0';
1047 	} else
1048 		entry->buf[size] = '\0';
1049 
1050 	__buffer_unlock_commit(buffer, event);
1051 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1052  out:
1053 	ring_buffer_nest_end(buffer);
1054 	return size;
1055 }
1056 EXPORT_SYMBOL_GPL(__trace_puts);
1057 
1058 /**
1059  * __trace_bputs - write the pointer to a constant string into trace buffer
1060  * @ip:	   The address of the caller
1061  * @str:   The constant string to write to the buffer to
1062  */
__trace_bputs(unsigned long ip,const char * str)1063 int __trace_bputs(unsigned long ip, const char *str)
1064 {
1065 	struct ring_buffer_event *event;
1066 	struct trace_buffer *buffer;
1067 	struct bputs_entry *entry;
1068 	unsigned long irq_flags;
1069 	int size = sizeof(struct bputs_entry);
1070 	int ret = 0;
1071 	int pc;
1072 
1073 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1074 		return 0;
1075 
1076 	pc = preempt_count();
1077 
1078 	if (unlikely(tracing_selftest_running || tracing_disabled))
1079 		return 0;
1080 
1081 	local_save_flags(irq_flags);
1082 	buffer = global_trace.array_buffer.buffer;
1083 
1084 	ring_buffer_nest_start(buffer);
1085 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1086 					    irq_flags, pc);
1087 	if (!event)
1088 		goto out;
1089 
1090 	entry = ring_buffer_event_data(event);
1091 	entry->ip			= ip;
1092 	entry->str			= str;
1093 
1094 	__buffer_unlock_commit(buffer, event);
1095 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1096 
1097 	ret = 1;
1098  out:
1099 	ring_buffer_nest_end(buffer);
1100 	return ret;
1101 }
1102 EXPORT_SYMBOL_GPL(__trace_bputs);
1103 
1104 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1105 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1106 					   void *cond_data)
1107 {
1108 	struct tracer *tracer = tr->current_trace;
1109 	unsigned long flags;
1110 
1111 	if (in_nmi()) {
1112 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1113 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1114 		return;
1115 	}
1116 
1117 	if (!tr->allocated_snapshot) {
1118 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1119 		internal_trace_puts("*** stopping trace here!   ***\n");
1120 		tracing_off();
1121 		return;
1122 	}
1123 
1124 	/* Note, snapshot can not be used when the tracer uses it */
1125 	if (tracer->use_max_tr) {
1126 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1127 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1128 		return;
1129 	}
1130 
1131 	local_irq_save(flags);
1132 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1133 	local_irq_restore(flags);
1134 }
1135 
tracing_snapshot_instance(struct trace_array * tr)1136 void tracing_snapshot_instance(struct trace_array *tr)
1137 {
1138 	tracing_snapshot_instance_cond(tr, NULL);
1139 }
1140 
1141 /**
1142  * tracing_snapshot - take a snapshot of the current buffer.
1143  *
1144  * This causes a swap between the snapshot buffer and the current live
1145  * tracing buffer. You can use this to take snapshots of the live
1146  * trace when some condition is triggered, but continue to trace.
1147  *
1148  * Note, make sure to allocate the snapshot with either
1149  * a tracing_snapshot_alloc(), or by doing it manually
1150  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1151  *
1152  * If the snapshot buffer is not allocated, it will stop tracing.
1153  * Basically making a permanent snapshot.
1154  */
tracing_snapshot(void)1155 void tracing_snapshot(void)
1156 {
1157 	struct trace_array *tr = &global_trace;
1158 
1159 	tracing_snapshot_instance(tr);
1160 }
1161 EXPORT_SYMBOL_GPL(tracing_snapshot);
1162 
1163 /**
1164  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1165  * @tr:		The tracing instance to snapshot
1166  * @cond_data:	The data to be tested conditionally, and possibly saved
1167  *
1168  * This is the same as tracing_snapshot() except that the snapshot is
1169  * conditional - the snapshot will only happen if the
1170  * cond_snapshot.update() implementation receiving the cond_data
1171  * returns true, which means that the trace array's cond_snapshot
1172  * update() operation used the cond_data to determine whether the
1173  * snapshot should be taken, and if it was, presumably saved it along
1174  * with the snapshot.
1175  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1176 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1177 {
1178 	tracing_snapshot_instance_cond(tr, cond_data);
1179 }
1180 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1181 
1182 /**
1183  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1184  * @tr:		The tracing instance
1185  *
1186  * When the user enables a conditional snapshot using
1187  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1188  * with the snapshot.  This accessor is used to retrieve it.
1189  *
1190  * Should not be called from cond_snapshot.update(), since it takes
1191  * the tr->max_lock lock, which the code calling
1192  * cond_snapshot.update() has already done.
1193  *
1194  * Returns the cond_data associated with the trace array's snapshot.
1195  */
tracing_cond_snapshot_data(struct trace_array * tr)1196 void *tracing_cond_snapshot_data(struct trace_array *tr)
1197 {
1198 	void *cond_data = NULL;
1199 
1200 	local_irq_disable();
1201 	arch_spin_lock(&tr->max_lock);
1202 
1203 	if (tr->cond_snapshot)
1204 		cond_data = tr->cond_snapshot->cond_data;
1205 
1206 	arch_spin_unlock(&tr->max_lock);
1207 	local_irq_enable();
1208 
1209 	return cond_data;
1210 }
1211 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1212 
1213 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1214 					struct array_buffer *size_buf, int cpu_id);
1215 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1216 
tracing_alloc_snapshot_instance(struct trace_array * tr)1217 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1218 {
1219 	int ret;
1220 
1221 	if (!tr->allocated_snapshot) {
1222 
1223 		/* allocate spare buffer */
1224 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1225 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1226 		if (ret < 0)
1227 			return ret;
1228 
1229 		tr->allocated_snapshot = true;
1230 	}
1231 
1232 	return 0;
1233 }
1234 
free_snapshot(struct trace_array * tr)1235 static void free_snapshot(struct trace_array *tr)
1236 {
1237 	/*
1238 	 * We don't free the ring buffer. instead, resize it because
1239 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1240 	 * we want preserve it.
1241 	 */
1242 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1243 	set_buffer_entries(&tr->max_buffer, 1);
1244 	tracing_reset_online_cpus(&tr->max_buffer);
1245 	tr->allocated_snapshot = false;
1246 }
1247 
1248 /**
1249  * tracing_alloc_snapshot - allocate snapshot buffer.
1250  *
1251  * This only allocates the snapshot buffer if it isn't already
1252  * allocated - it doesn't also take a snapshot.
1253  *
1254  * This is meant to be used in cases where the snapshot buffer needs
1255  * to be set up for events that can't sleep but need to be able to
1256  * trigger a snapshot.
1257  */
tracing_alloc_snapshot(void)1258 int tracing_alloc_snapshot(void)
1259 {
1260 	struct trace_array *tr = &global_trace;
1261 	int ret;
1262 
1263 	ret = tracing_alloc_snapshot_instance(tr);
1264 	WARN_ON(ret < 0);
1265 
1266 	return ret;
1267 }
1268 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1269 
1270 /**
1271  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1272  *
1273  * This is similar to tracing_snapshot(), but it will allocate the
1274  * snapshot buffer if it isn't already allocated. Use this only
1275  * where it is safe to sleep, as the allocation may sleep.
1276  *
1277  * This causes a swap between the snapshot buffer and the current live
1278  * tracing buffer. You can use this to take snapshots of the live
1279  * trace when some condition is triggered, but continue to trace.
1280  */
tracing_snapshot_alloc(void)1281 void tracing_snapshot_alloc(void)
1282 {
1283 	int ret;
1284 
1285 	ret = tracing_alloc_snapshot();
1286 	if (ret < 0)
1287 		return;
1288 
1289 	tracing_snapshot();
1290 }
1291 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1292 
1293 /**
1294  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1295  * @tr:		The tracing instance
1296  * @cond_data:	User data to associate with the snapshot
1297  * @update:	Implementation of the cond_snapshot update function
1298  *
1299  * Check whether the conditional snapshot for the given instance has
1300  * already been enabled, or if the current tracer is already using a
1301  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1302  * save the cond_data and update function inside.
1303  *
1304  * Returns 0 if successful, error otherwise.
1305  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1306 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1307 				 cond_update_fn_t update)
1308 {
1309 	struct cond_snapshot *cond_snapshot;
1310 	int ret = 0;
1311 
1312 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1313 	if (!cond_snapshot)
1314 		return -ENOMEM;
1315 
1316 	cond_snapshot->cond_data = cond_data;
1317 	cond_snapshot->update = update;
1318 
1319 	mutex_lock(&trace_types_lock);
1320 
1321 	ret = tracing_alloc_snapshot_instance(tr);
1322 	if (ret)
1323 		goto fail_unlock;
1324 
1325 	if (tr->current_trace->use_max_tr) {
1326 		ret = -EBUSY;
1327 		goto fail_unlock;
1328 	}
1329 
1330 	/*
1331 	 * The cond_snapshot can only change to NULL without the
1332 	 * trace_types_lock. We don't care if we race with it going
1333 	 * to NULL, but we want to make sure that it's not set to
1334 	 * something other than NULL when we get here, which we can
1335 	 * do safely with only holding the trace_types_lock and not
1336 	 * having to take the max_lock.
1337 	 */
1338 	if (tr->cond_snapshot) {
1339 		ret = -EBUSY;
1340 		goto fail_unlock;
1341 	}
1342 
1343 	local_irq_disable();
1344 	arch_spin_lock(&tr->max_lock);
1345 	tr->cond_snapshot = cond_snapshot;
1346 	arch_spin_unlock(&tr->max_lock);
1347 	local_irq_enable();
1348 
1349 	mutex_unlock(&trace_types_lock);
1350 
1351 	return ret;
1352 
1353  fail_unlock:
1354 	mutex_unlock(&trace_types_lock);
1355 	kfree(cond_snapshot);
1356 	return ret;
1357 }
1358 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1359 
1360 /**
1361  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1362  * @tr:		The tracing instance
1363  *
1364  * Check whether the conditional snapshot for the given instance is
1365  * enabled; if so, free the cond_snapshot associated with it,
1366  * otherwise return -EINVAL.
1367  *
1368  * Returns 0 if successful, error otherwise.
1369  */
tracing_snapshot_cond_disable(struct trace_array * tr)1370 int tracing_snapshot_cond_disable(struct trace_array *tr)
1371 {
1372 	int ret = 0;
1373 
1374 	local_irq_disable();
1375 	arch_spin_lock(&tr->max_lock);
1376 
1377 	if (!tr->cond_snapshot)
1378 		ret = -EINVAL;
1379 	else {
1380 		kfree(tr->cond_snapshot);
1381 		tr->cond_snapshot = NULL;
1382 	}
1383 
1384 	arch_spin_unlock(&tr->max_lock);
1385 	local_irq_enable();
1386 
1387 	return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1390 #else
tracing_snapshot(void)1391 void tracing_snapshot(void)
1392 {
1393 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1394 }
1395 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1396 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1397 {
1398 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1399 }
1400 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1401 int tracing_alloc_snapshot(void)
1402 {
1403 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1404 	return -ENODEV;
1405 }
1406 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1407 void tracing_snapshot_alloc(void)
1408 {
1409 	/* Give warning */
1410 	tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1413 void *tracing_cond_snapshot_data(struct trace_array *tr)
1414 {
1415 	return NULL;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1418 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1419 {
1420 	return -ENODEV;
1421 }
1422 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1423 int tracing_snapshot_cond_disable(struct trace_array *tr)
1424 {
1425 	return false;
1426 }
1427 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1428 #endif /* CONFIG_TRACER_SNAPSHOT */
1429 
tracer_tracing_off(struct trace_array * tr)1430 void tracer_tracing_off(struct trace_array *tr)
1431 {
1432 	if (tr->array_buffer.buffer)
1433 		ring_buffer_record_off(tr->array_buffer.buffer);
1434 	/*
1435 	 * This flag is looked at when buffers haven't been allocated
1436 	 * yet, or by some tracers (like irqsoff), that just want to
1437 	 * know if the ring buffer has been disabled, but it can handle
1438 	 * races of where it gets disabled but we still do a record.
1439 	 * As the check is in the fast path of the tracers, it is more
1440 	 * important to be fast than accurate.
1441 	 */
1442 	tr->buffer_disabled = 1;
1443 	/* Make the flag seen by readers */
1444 	smp_wmb();
1445 }
1446 
1447 /**
1448  * tracing_off - turn off tracing buffers
1449  *
1450  * This function stops the tracing buffers from recording data.
1451  * It does not disable any overhead the tracers themselves may
1452  * be causing. This function simply causes all recording to
1453  * the ring buffers to fail.
1454  */
tracing_off(void)1455 void tracing_off(void)
1456 {
1457 	tracer_tracing_off(&global_trace);
1458 }
1459 EXPORT_SYMBOL_GPL(tracing_off);
1460 
disable_trace_on_warning(void)1461 void disable_trace_on_warning(void)
1462 {
1463 	if (__disable_trace_on_warning) {
1464 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1465 			"Disabling tracing due to warning\n");
1466 		tracing_off();
1467 	}
1468 }
1469 
1470 /**
1471  * tracer_tracing_is_on - show real state of ring buffer enabled
1472  * @tr : the trace array to know if ring buffer is enabled
1473  *
1474  * Shows real state of the ring buffer if it is enabled or not.
1475  */
tracer_tracing_is_on(struct trace_array * tr)1476 bool tracer_tracing_is_on(struct trace_array *tr)
1477 {
1478 	if (tr->array_buffer.buffer)
1479 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1480 	return !tr->buffer_disabled;
1481 }
1482 
1483 /**
1484  * tracing_is_on - show state of ring buffers enabled
1485  */
tracing_is_on(void)1486 int tracing_is_on(void)
1487 {
1488 	return tracer_tracing_is_on(&global_trace);
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_is_on);
1491 
set_buf_size(char * str)1492 static int __init set_buf_size(char *str)
1493 {
1494 	unsigned long buf_size;
1495 
1496 	if (!str)
1497 		return 0;
1498 	buf_size = memparse(str, &str);
1499 	/*
1500 	 * nr_entries can not be zero and the startup
1501 	 * tests require some buffer space. Therefore
1502 	 * ensure we have at least 4096 bytes of buffer.
1503 	 */
1504 	trace_buf_size = max(4096UL, buf_size);
1505 	return 1;
1506 }
1507 __setup("trace_buf_size=", set_buf_size);
1508 
set_tracing_thresh(char * str)1509 static int __init set_tracing_thresh(char *str)
1510 {
1511 	unsigned long threshold;
1512 	int ret;
1513 
1514 	if (!str)
1515 		return 0;
1516 	ret = kstrtoul(str, 0, &threshold);
1517 	if (ret < 0)
1518 		return 0;
1519 	tracing_thresh = threshold * 1000;
1520 	return 1;
1521 }
1522 __setup("tracing_thresh=", set_tracing_thresh);
1523 
nsecs_to_usecs(unsigned long nsecs)1524 unsigned long nsecs_to_usecs(unsigned long nsecs)
1525 {
1526 	return nsecs / 1000;
1527 }
1528 
1529 /*
1530  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1531  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1532  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1533  * of strings in the order that the evals (enum) were defined.
1534  */
1535 #undef C
1536 #define C(a, b) b
1537 
1538 /* These must match the bit postions in trace_iterator_flags */
1539 static const char *trace_options[] = {
1540 	TRACE_FLAGS
1541 	NULL
1542 };
1543 
1544 static struct {
1545 	u64 (*func)(void);
1546 	const char *name;
1547 	int in_ns;		/* is this clock in nanoseconds? */
1548 } trace_clocks[] = {
1549 	{ trace_clock_local,		"local",	1 },
1550 	{ trace_clock_global,		"global",	1 },
1551 	{ trace_clock_counter,		"counter",	0 },
1552 	{ trace_clock_jiffies,		"uptime",	0 },
1553 	{ trace_clock,			"perf",		1 },
1554 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1555 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1556 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1557 	ARCH_TRACE_CLOCKS
1558 };
1559 
trace_clock_in_ns(struct trace_array * tr)1560 bool trace_clock_in_ns(struct trace_array *tr)
1561 {
1562 	if (trace_clocks[tr->clock_id].in_ns)
1563 		return true;
1564 
1565 	return false;
1566 }
1567 
1568 /*
1569  * trace_parser_get_init - gets the buffer for trace parser
1570  */
trace_parser_get_init(struct trace_parser * parser,int size)1571 int trace_parser_get_init(struct trace_parser *parser, int size)
1572 {
1573 	memset(parser, 0, sizeof(*parser));
1574 
1575 	parser->buffer = kmalloc(size, GFP_KERNEL);
1576 	if (!parser->buffer)
1577 		return 1;
1578 
1579 	parser->size = size;
1580 	return 0;
1581 }
1582 
1583 /*
1584  * trace_parser_put - frees the buffer for trace parser
1585  */
trace_parser_put(struct trace_parser * parser)1586 void trace_parser_put(struct trace_parser *parser)
1587 {
1588 	kfree(parser->buffer);
1589 	parser->buffer = NULL;
1590 }
1591 
1592 /*
1593  * trace_get_user - reads the user input string separated by  space
1594  * (matched by isspace(ch))
1595  *
1596  * For each string found the 'struct trace_parser' is updated,
1597  * and the function returns.
1598  *
1599  * Returns number of bytes read.
1600  *
1601  * See kernel/trace/trace.h for 'struct trace_parser' details.
1602  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1603 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1604 	size_t cnt, loff_t *ppos)
1605 {
1606 	char ch;
1607 	size_t read = 0;
1608 	ssize_t ret;
1609 
1610 	if (!*ppos)
1611 		trace_parser_clear(parser);
1612 
1613 	ret = get_user(ch, ubuf++);
1614 	if (ret)
1615 		goto out;
1616 
1617 	read++;
1618 	cnt--;
1619 
1620 	/*
1621 	 * The parser is not finished with the last write,
1622 	 * continue reading the user input without skipping spaces.
1623 	 */
1624 	if (!parser->cont) {
1625 		/* skip white space */
1626 		while (cnt && isspace(ch)) {
1627 			ret = get_user(ch, ubuf++);
1628 			if (ret)
1629 				goto out;
1630 			read++;
1631 			cnt--;
1632 		}
1633 
1634 		parser->idx = 0;
1635 
1636 		/* only spaces were written */
1637 		if (isspace(ch) || !ch) {
1638 			*ppos += read;
1639 			ret = read;
1640 			goto out;
1641 		}
1642 	}
1643 
1644 	/* read the non-space input */
1645 	while (cnt && !isspace(ch) && ch) {
1646 		if (parser->idx < parser->size - 1)
1647 			parser->buffer[parser->idx++] = ch;
1648 		else {
1649 			ret = -EINVAL;
1650 			goto out;
1651 		}
1652 		ret = get_user(ch, ubuf++);
1653 		if (ret)
1654 			goto out;
1655 		read++;
1656 		cnt--;
1657 	}
1658 
1659 	/* We either got finished input or we have to wait for another call. */
1660 	if (isspace(ch) || !ch) {
1661 		parser->buffer[parser->idx] = 0;
1662 		parser->cont = false;
1663 	} else if (parser->idx < parser->size - 1) {
1664 		parser->cont = true;
1665 		parser->buffer[parser->idx++] = ch;
1666 		/* Make sure the parsed string always terminates with '\0'. */
1667 		parser->buffer[parser->idx] = 0;
1668 	} else {
1669 		ret = -EINVAL;
1670 		goto out;
1671 	}
1672 
1673 	*ppos += read;
1674 	ret = read;
1675 
1676 out:
1677 	return ret;
1678 }
1679 
1680 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1681 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1682 {
1683 	int len;
1684 
1685 	if (trace_seq_used(s) <= s->seq.readpos)
1686 		return -EBUSY;
1687 
1688 	len = trace_seq_used(s) - s->seq.readpos;
1689 	if (cnt > len)
1690 		cnt = len;
1691 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1692 
1693 	s->seq.readpos += cnt;
1694 	return cnt;
1695 }
1696 
1697 unsigned long __read_mostly	tracing_thresh;
1698 static const struct file_operations tracing_max_lat_fops;
1699 
1700 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1701 	defined(CONFIG_FSNOTIFY)
1702 
1703 static struct workqueue_struct *fsnotify_wq;
1704 
latency_fsnotify_workfn(struct work_struct * work)1705 static void latency_fsnotify_workfn(struct work_struct *work)
1706 {
1707 	struct trace_array *tr = container_of(work, struct trace_array,
1708 					      fsnotify_work);
1709 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1710 }
1711 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1712 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1713 {
1714 	struct trace_array *tr = container_of(iwork, struct trace_array,
1715 					      fsnotify_irqwork);
1716 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1717 }
1718 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1719 static void trace_create_maxlat_file(struct trace_array *tr,
1720 				     struct dentry *d_tracer)
1721 {
1722 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1723 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1724 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1725 					      d_tracer, &tr->max_latency,
1726 					      &tracing_max_lat_fops);
1727 }
1728 
latency_fsnotify_init(void)1729 __init static int latency_fsnotify_init(void)
1730 {
1731 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1732 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1733 	if (!fsnotify_wq) {
1734 		pr_err("Unable to allocate tr_max_lat_wq\n");
1735 		return -ENOMEM;
1736 	}
1737 	return 0;
1738 }
1739 
1740 late_initcall_sync(latency_fsnotify_init);
1741 
latency_fsnotify(struct trace_array * tr)1742 void latency_fsnotify(struct trace_array *tr)
1743 {
1744 	if (!fsnotify_wq)
1745 		return;
1746 	/*
1747 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1748 	 * possible that we are called from __schedule() or do_idle(), which
1749 	 * could cause a deadlock.
1750 	 */
1751 	irq_work_queue(&tr->fsnotify_irqwork);
1752 }
1753 
1754 /*
1755  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1756  *  defined(CONFIG_FSNOTIFY)
1757  */
1758 #else
1759 
1760 #define trace_create_maxlat_file(tr, d_tracer)				\
1761 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1762 			  &tr->max_latency, &tracing_max_lat_fops)
1763 
1764 #endif
1765 
1766 #ifdef CONFIG_TRACER_MAX_TRACE
1767 /*
1768  * Copy the new maximum trace into the separate maximum-trace
1769  * structure. (this way the maximum trace is permanently saved,
1770  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1771  */
1772 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1773 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1774 {
1775 	struct array_buffer *trace_buf = &tr->array_buffer;
1776 	struct array_buffer *max_buf = &tr->max_buffer;
1777 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1778 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1779 
1780 	max_buf->cpu = cpu;
1781 	max_buf->time_start = data->preempt_timestamp;
1782 
1783 	max_data->saved_latency = tr->max_latency;
1784 	max_data->critical_start = data->critical_start;
1785 	max_data->critical_end = data->critical_end;
1786 
1787 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1788 	max_data->pid = tsk->pid;
1789 	/*
1790 	 * If tsk == current, then use current_uid(), as that does not use
1791 	 * RCU. The irq tracer can be called out of RCU scope.
1792 	 */
1793 	if (tsk == current)
1794 		max_data->uid = current_uid();
1795 	else
1796 		max_data->uid = task_uid(tsk);
1797 
1798 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1799 	max_data->policy = tsk->policy;
1800 	max_data->rt_priority = tsk->rt_priority;
1801 
1802 	/* record this tasks comm */
1803 	tracing_record_cmdline(tsk);
1804 	latency_fsnotify(tr);
1805 }
1806 
1807 /**
1808  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1809  * @tr: tracer
1810  * @tsk: the task with the latency
1811  * @cpu: The cpu that initiated the trace.
1812  * @cond_data: User data associated with a conditional snapshot
1813  *
1814  * Flip the buffers between the @tr and the max_tr and record information
1815  * about which task was the cause of this latency.
1816  */
1817 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1818 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1819 	      void *cond_data)
1820 {
1821 	if (tr->stop_count)
1822 		return;
1823 
1824 	WARN_ON_ONCE(!irqs_disabled());
1825 
1826 	if (!tr->allocated_snapshot) {
1827 		/* Only the nop tracer should hit this when disabling */
1828 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1829 		return;
1830 	}
1831 
1832 	arch_spin_lock(&tr->max_lock);
1833 
1834 	/* Inherit the recordable setting from array_buffer */
1835 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1836 		ring_buffer_record_on(tr->max_buffer.buffer);
1837 	else
1838 		ring_buffer_record_off(tr->max_buffer.buffer);
1839 
1840 #ifdef CONFIG_TRACER_SNAPSHOT
1841 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1842 		goto out_unlock;
1843 #endif
1844 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1845 
1846 	__update_max_tr(tr, tsk, cpu);
1847 
1848  out_unlock:
1849 	arch_spin_unlock(&tr->max_lock);
1850 }
1851 
1852 /**
1853  * update_max_tr_single - only copy one trace over, and reset the rest
1854  * @tr: tracer
1855  * @tsk: task with the latency
1856  * @cpu: the cpu of the buffer to copy.
1857  *
1858  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1859  */
1860 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1861 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1862 {
1863 	int ret;
1864 
1865 	if (tr->stop_count)
1866 		return;
1867 
1868 	WARN_ON_ONCE(!irqs_disabled());
1869 	if (!tr->allocated_snapshot) {
1870 		/* Only the nop tracer should hit this when disabling */
1871 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1872 		return;
1873 	}
1874 
1875 	arch_spin_lock(&tr->max_lock);
1876 
1877 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1878 
1879 	if (ret == -EBUSY) {
1880 		/*
1881 		 * We failed to swap the buffer due to a commit taking
1882 		 * place on this CPU. We fail to record, but we reset
1883 		 * the max trace buffer (no one writes directly to it)
1884 		 * and flag that it failed.
1885 		 * Another reason is resize is in progress.
1886 		 */
1887 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1888 			"Failed to swap buffers due to commit or resize in progress\n");
1889 	}
1890 
1891 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1892 
1893 	__update_max_tr(tr, tsk, cpu);
1894 	arch_spin_unlock(&tr->max_lock);
1895 
1896 	/* Any waiters on the old snapshot buffer need to wake up */
1897 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1898 }
1899 #endif /* CONFIG_TRACER_MAX_TRACE */
1900 
wait_on_pipe(struct trace_iterator * iter,int full)1901 static int wait_on_pipe(struct trace_iterator *iter, int full)
1902 {
1903 	int ret;
1904 
1905 	/* Iterators are static, they should be filled or empty */
1906 	if (trace_buffer_iter(iter, iter->cpu_file))
1907 		return 0;
1908 
1909 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1910 
1911 #ifdef CONFIG_TRACER_MAX_TRACE
1912 	/*
1913 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1914 	 * to happen, this would now be the main buffer.
1915 	 */
1916 	if (iter->snapshot)
1917 		iter->array_buffer = &iter->tr->max_buffer;
1918 #endif
1919 	return ret;
1920 }
1921 
1922 #ifdef CONFIG_FTRACE_STARTUP_TEST
1923 static bool selftests_can_run;
1924 
1925 struct trace_selftests {
1926 	struct list_head		list;
1927 	struct tracer			*type;
1928 };
1929 
1930 static LIST_HEAD(postponed_selftests);
1931 
save_selftest(struct tracer * type)1932 static int save_selftest(struct tracer *type)
1933 {
1934 	struct trace_selftests *selftest;
1935 
1936 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1937 	if (!selftest)
1938 		return -ENOMEM;
1939 
1940 	selftest->type = type;
1941 	list_add(&selftest->list, &postponed_selftests);
1942 	return 0;
1943 }
1944 
run_tracer_selftest(struct tracer * type)1945 static int run_tracer_selftest(struct tracer *type)
1946 {
1947 	struct trace_array *tr = &global_trace;
1948 	struct tracer *saved_tracer = tr->current_trace;
1949 	int ret;
1950 
1951 	if (!type->selftest || tracing_selftest_disabled)
1952 		return 0;
1953 
1954 	/*
1955 	 * If a tracer registers early in boot up (before scheduling is
1956 	 * initialized and such), then do not run its selftests yet.
1957 	 * Instead, run it a little later in the boot process.
1958 	 */
1959 	if (!selftests_can_run)
1960 		return save_selftest(type);
1961 
1962 	/*
1963 	 * Run a selftest on this tracer.
1964 	 * Here we reset the trace buffer, and set the current
1965 	 * tracer to be this tracer. The tracer can then run some
1966 	 * internal tracing to verify that everything is in order.
1967 	 * If we fail, we do not register this tracer.
1968 	 */
1969 	tracing_reset_online_cpus(&tr->array_buffer);
1970 
1971 	tr->current_trace = type;
1972 
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974 	if (type->use_max_tr) {
1975 		/* If we expanded the buffers, make sure the max is expanded too */
1976 		if (ring_buffer_expanded)
1977 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1978 					   RING_BUFFER_ALL_CPUS);
1979 		tr->allocated_snapshot = true;
1980 	}
1981 #endif
1982 
1983 	/* the test is responsible for initializing and enabling */
1984 	pr_info("Testing tracer %s: ", type->name);
1985 	ret = type->selftest(type, tr);
1986 	/* the test is responsible for resetting too */
1987 	tr->current_trace = saved_tracer;
1988 	if (ret) {
1989 		printk(KERN_CONT "FAILED!\n");
1990 		/* Add the warning after printing 'FAILED' */
1991 		WARN_ON(1);
1992 		return -1;
1993 	}
1994 	/* Only reset on passing, to avoid touching corrupted buffers */
1995 	tracing_reset_online_cpus(&tr->array_buffer);
1996 
1997 #ifdef CONFIG_TRACER_MAX_TRACE
1998 	if (type->use_max_tr) {
1999 		tr->allocated_snapshot = false;
2000 
2001 		/* Shrink the max buffer again */
2002 		if (ring_buffer_expanded)
2003 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2004 					   RING_BUFFER_ALL_CPUS);
2005 	}
2006 #endif
2007 
2008 	printk(KERN_CONT "PASSED\n");
2009 	return 0;
2010 }
2011 
init_trace_selftests(void)2012 static __init int init_trace_selftests(void)
2013 {
2014 	struct trace_selftests *p, *n;
2015 	struct tracer *t, **last;
2016 	int ret;
2017 
2018 	selftests_can_run = true;
2019 
2020 	mutex_lock(&trace_types_lock);
2021 
2022 	if (list_empty(&postponed_selftests))
2023 		goto out;
2024 
2025 	pr_info("Running postponed tracer tests:\n");
2026 
2027 	tracing_selftest_running = true;
2028 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2029 		/* This loop can take minutes when sanitizers are enabled, so
2030 		 * lets make sure we allow RCU processing.
2031 		 */
2032 		cond_resched();
2033 		ret = run_tracer_selftest(p->type);
2034 		/* If the test fails, then warn and remove from available_tracers */
2035 		if (ret < 0) {
2036 			WARN(1, "tracer: %s failed selftest, disabling\n",
2037 			     p->type->name);
2038 			last = &trace_types;
2039 			for (t = trace_types; t; t = t->next) {
2040 				if (t == p->type) {
2041 					*last = t->next;
2042 					break;
2043 				}
2044 				last = &t->next;
2045 			}
2046 		}
2047 		list_del(&p->list);
2048 		kfree(p);
2049 	}
2050 	tracing_selftest_running = false;
2051 
2052  out:
2053 	mutex_unlock(&trace_types_lock);
2054 
2055 	return 0;
2056 }
2057 core_initcall(init_trace_selftests);
2058 #else
run_tracer_selftest(struct tracer * type)2059 static inline int run_tracer_selftest(struct tracer *type)
2060 {
2061 	return 0;
2062 }
2063 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2064 
2065 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2066 
2067 static void __init apply_trace_boot_options(void);
2068 
2069 /**
2070  * register_tracer - register a tracer with the ftrace system.
2071  * @type: the plugin for the tracer
2072  *
2073  * Register a new plugin tracer.
2074  */
register_tracer(struct tracer * type)2075 int __init register_tracer(struct tracer *type)
2076 {
2077 	struct tracer *t;
2078 	int ret = 0;
2079 
2080 	if (!type->name) {
2081 		pr_info("Tracer must have a name\n");
2082 		return -1;
2083 	}
2084 
2085 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2086 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2087 		return -1;
2088 	}
2089 
2090 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2091 		pr_warn("Can not register tracer %s due to lockdown\n",
2092 			   type->name);
2093 		return -EPERM;
2094 	}
2095 
2096 	mutex_lock(&trace_types_lock);
2097 
2098 	tracing_selftest_running = true;
2099 
2100 	for (t = trace_types; t; t = t->next) {
2101 		if (strcmp(type->name, t->name) == 0) {
2102 			/* already found */
2103 			pr_info("Tracer %s already registered\n",
2104 				type->name);
2105 			ret = -1;
2106 			goto out;
2107 		}
2108 	}
2109 
2110 	if (!type->set_flag)
2111 		type->set_flag = &dummy_set_flag;
2112 	if (!type->flags) {
2113 		/*allocate a dummy tracer_flags*/
2114 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2115 		if (!type->flags) {
2116 			ret = -ENOMEM;
2117 			goto out;
2118 		}
2119 		type->flags->val = 0;
2120 		type->flags->opts = dummy_tracer_opt;
2121 	} else
2122 		if (!type->flags->opts)
2123 			type->flags->opts = dummy_tracer_opt;
2124 
2125 	/* store the tracer for __set_tracer_option */
2126 	type->flags->trace = type;
2127 
2128 	ret = run_tracer_selftest(type);
2129 	if (ret < 0)
2130 		goto out;
2131 
2132 	type->next = trace_types;
2133 	trace_types = type;
2134 	add_tracer_options(&global_trace, type);
2135 
2136  out:
2137 	tracing_selftest_running = false;
2138 	mutex_unlock(&trace_types_lock);
2139 
2140 	if (ret || !default_bootup_tracer)
2141 		goto out_unlock;
2142 
2143 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2144 		goto out_unlock;
2145 
2146 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2147 	/* Do we want this tracer to start on bootup? */
2148 	tracing_set_tracer(&global_trace, type->name);
2149 	default_bootup_tracer = NULL;
2150 
2151 	apply_trace_boot_options();
2152 
2153 	/* disable other selftests, since this will break it. */
2154 	disable_tracing_selftest("running a tracer");
2155 
2156  out_unlock:
2157 	return ret;
2158 }
2159 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2160 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2161 {
2162 	struct trace_buffer *buffer = buf->buffer;
2163 
2164 	if (!buffer)
2165 		return;
2166 
2167 	ring_buffer_record_disable(buffer);
2168 
2169 	/* Make sure all commits have finished */
2170 	synchronize_rcu();
2171 	ring_buffer_reset_cpu(buffer, cpu);
2172 
2173 	ring_buffer_record_enable(buffer);
2174 }
2175 
tracing_reset_online_cpus(struct array_buffer * buf)2176 void tracing_reset_online_cpus(struct array_buffer *buf)
2177 {
2178 	struct trace_buffer *buffer = buf->buffer;
2179 
2180 	if (!buffer)
2181 		return;
2182 
2183 	ring_buffer_record_disable(buffer);
2184 
2185 	/* Make sure all commits have finished */
2186 	synchronize_rcu();
2187 
2188 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2189 
2190 	ring_buffer_reset_online_cpus(buffer);
2191 
2192 	ring_buffer_record_enable(buffer);
2193 }
2194 
2195 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2196 void tracing_reset_all_online_cpus_unlocked(void)
2197 {
2198 	struct trace_array *tr;
2199 
2200 	lockdep_assert_held(&trace_types_lock);
2201 
2202 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2203 		if (!tr->clear_trace)
2204 			continue;
2205 		tr->clear_trace = false;
2206 		tracing_reset_online_cpus(&tr->array_buffer);
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 		tracing_reset_online_cpus(&tr->max_buffer);
2209 #endif
2210 	}
2211 }
2212 
tracing_reset_all_online_cpus(void)2213 void tracing_reset_all_online_cpus(void)
2214 {
2215 	mutex_lock(&trace_types_lock);
2216 	tracing_reset_all_online_cpus_unlocked();
2217 	mutex_unlock(&trace_types_lock);
2218 }
2219 
2220 /*
2221  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2222  * is the tgid last observed corresponding to pid=i.
2223  */
2224 static int *tgid_map;
2225 
2226 /* The maximum valid index into tgid_map. */
2227 static size_t tgid_map_max;
2228 
2229 #define SAVED_CMDLINES_DEFAULT 128
2230 #define NO_CMDLINE_MAP UINT_MAX
2231 /*
2232  * Preemption must be disabled before acquiring trace_cmdline_lock.
2233  * The various trace_arrays' max_lock must be acquired in a context
2234  * where interrupt is disabled.
2235  */
2236 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2237 struct saved_cmdlines_buffer {
2238 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2239 	unsigned *map_cmdline_to_pid;
2240 	unsigned cmdline_num;
2241 	int cmdline_idx;
2242 	char *saved_cmdlines;
2243 };
2244 static struct saved_cmdlines_buffer *savedcmd;
2245 
get_saved_cmdlines(int idx)2246 static inline char *get_saved_cmdlines(int idx)
2247 {
2248 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2249 }
2250 
set_cmdline(int idx,const char * cmdline)2251 static inline void set_cmdline(int idx, const char *cmdline)
2252 {
2253 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2254 }
2255 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)2256 static int allocate_cmdlines_buffer(unsigned int val,
2257 				    struct saved_cmdlines_buffer *s)
2258 {
2259 	s->map_cmdline_to_pid = kmalloc_array(val,
2260 					      sizeof(*s->map_cmdline_to_pid),
2261 					      GFP_KERNEL);
2262 	if (!s->map_cmdline_to_pid)
2263 		return -ENOMEM;
2264 
2265 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2266 	if (!s->saved_cmdlines) {
2267 		kfree(s->map_cmdline_to_pid);
2268 		return -ENOMEM;
2269 	}
2270 
2271 	s->cmdline_idx = 0;
2272 	s->cmdline_num = val;
2273 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2274 	       sizeof(s->map_pid_to_cmdline));
2275 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2276 	       val * sizeof(*s->map_cmdline_to_pid));
2277 
2278 	return 0;
2279 }
2280 
trace_create_savedcmd(void)2281 static int trace_create_savedcmd(void)
2282 {
2283 	int ret;
2284 
2285 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2286 	if (!savedcmd)
2287 		return -ENOMEM;
2288 
2289 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2290 	if (ret < 0) {
2291 		kfree(savedcmd);
2292 		savedcmd = NULL;
2293 		return -ENOMEM;
2294 	}
2295 
2296 	return 0;
2297 }
2298 
is_tracing_stopped(void)2299 int is_tracing_stopped(void)
2300 {
2301 	return global_trace.stop_count;
2302 }
2303 
tracing_start_tr(struct trace_array * tr)2304 static void tracing_start_tr(struct trace_array *tr)
2305 {
2306 	struct trace_buffer *buffer;
2307 	unsigned long flags;
2308 
2309 	if (tracing_disabled)
2310 		return;
2311 
2312 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2313 	if (--tr->stop_count) {
2314 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2315 			/* Someone screwed up their debugging */
2316 			tr->stop_count = 0;
2317 		}
2318 		goto out;
2319 	}
2320 
2321 	/* Prevent the buffers from switching */
2322 	arch_spin_lock(&tr->max_lock);
2323 
2324 	buffer = tr->array_buffer.buffer;
2325 	if (buffer)
2326 		ring_buffer_record_enable(buffer);
2327 
2328 #ifdef CONFIG_TRACER_MAX_TRACE
2329 	buffer = tr->max_buffer.buffer;
2330 	if (buffer)
2331 		ring_buffer_record_enable(buffer);
2332 #endif
2333 
2334 	arch_spin_unlock(&tr->max_lock);
2335 
2336  out:
2337 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2338 }
2339 
2340 /**
2341  * tracing_start - quick start of the tracer
2342  *
2343  * If tracing is enabled but was stopped by tracing_stop,
2344  * this will start the tracer back up.
2345  */
tracing_start(void)2346 void tracing_start(void)
2347 
2348 {
2349 	return tracing_start_tr(&global_trace);
2350 }
2351 
tracing_stop_tr(struct trace_array * tr)2352 static void tracing_stop_tr(struct trace_array *tr)
2353 {
2354 	struct trace_buffer *buffer;
2355 	unsigned long flags;
2356 
2357 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2358 	if (tr->stop_count++)
2359 		goto out;
2360 
2361 	/* Prevent the buffers from switching */
2362 	arch_spin_lock(&tr->max_lock);
2363 
2364 	buffer = tr->array_buffer.buffer;
2365 	if (buffer)
2366 		ring_buffer_record_disable(buffer);
2367 
2368 #ifdef CONFIG_TRACER_MAX_TRACE
2369 	buffer = tr->max_buffer.buffer;
2370 	if (buffer)
2371 		ring_buffer_record_disable(buffer);
2372 #endif
2373 
2374 	arch_spin_unlock(&tr->max_lock);
2375 
2376  out:
2377 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2378 }
2379 
2380 /**
2381  * tracing_stop - quick stop of the tracer
2382  *
2383  * Light weight way to stop tracing. Use in conjunction with
2384  * tracing_start.
2385  */
tracing_stop(void)2386 void tracing_stop(void)
2387 {
2388 	return tracing_stop_tr(&global_trace);
2389 }
2390 
trace_save_cmdline(struct task_struct * tsk)2391 static int trace_save_cmdline(struct task_struct *tsk)
2392 {
2393 	unsigned tpid, idx;
2394 
2395 	/* treat recording of idle task as a success */
2396 	if (!tsk->pid)
2397 		return 1;
2398 
2399 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2400 
2401 	/*
2402 	 * It's not the end of the world if we don't get
2403 	 * the lock, but we also don't want to spin
2404 	 * nor do we want to disable interrupts,
2405 	 * so if we miss here, then better luck next time.
2406 	 *
2407 	 * This is called within the scheduler and wake up, so interrupts
2408 	 * had better been disabled and run queue lock been held.
2409 	 */
2410 	lockdep_assert_preemption_disabled();
2411 	if (!arch_spin_trylock(&trace_cmdline_lock))
2412 		return 0;
2413 
2414 	idx = savedcmd->map_pid_to_cmdline[tpid];
2415 	if (idx == NO_CMDLINE_MAP) {
2416 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2417 
2418 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2419 		savedcmd->cmdline_idx = idx;
2420 	}
2421 
2422 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2423 	set_cmdline(idx, tsk->comm);
2424 
2425 	arch_spin_unlock(&trace_cmdline_lock);
2426 
2427 	return 1;
2428 }
2429 
__trace_find_cmdline(int pid,char comm[])2430 static void __trace_find_cmdline(int pid, char comm[])
2431 {
2432 	unsigned map;
2433 	int tpid;
2434 
2435 	if (!pid) {
2436 		strcpy(comm, "<idle>");
2437 		return;
2438 	}
2439 
2440 	if (WARN_ON_ONCE(pid < 0)) {
2441 		strcpy(comm, "<XXX>");
2442 		return;
2443 	}
2444 
2445 	tpid = pid & (PID_MAX_DEFAULT - 1);
2446 	map = savedcmd->map_pid_to_cmdline[tpid];
2447 	if (map != NO_CMDLINE_MAP) {
2448 		tpid = savedcmd->map_cmdline_to_pid[map];
2449 		if (tpid == pid) {
2450 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2451 			return;
2452 		}
2453 	}
2454 	strcpy(comm, "<...>");
2455 }
2456 
trace_find_cmdline(int pid,char comm[])2457 void trace_find_cmdline(int pid, char comm[])
2458 {
2459 	preempt_disable();
2460 	arch_spin_lock(&trace_cmdline_lock);
2461 
2462 	__trace_find_cmdline(pid, comm);
2463 
2464 	arch_spin_unlock(&trace_cmdline_lock);
2465 	preempt_enable();
2466 }
2467 
trace_find_tgid_ptr(int pid)2468 static int *trace_find_tgid_ptr(int pid)
2469 {
2470 	/*
2471 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2472 	 * if we observe a non-NULL tgid_map then we also observe the correct
2473 	 * tgid_map_max.
2474 	 */
2475 	int *map = smp_load_acquire(&tgid_map);
2476 
2477 	if (unlikely(!map || pid > tgid_map_max))
2478 		return NULL;
2479 
2480 	return &map[pid];
2481 }
2482 
trace_find_tgid(int pid)2483 int trace_find_tgid(int pid)
2484 {
2485 	int *ptr = trace_find_tgid_ptr(pid);
2486 
2487 	return ptr ? *ptr : 0;
2488 }
2489 
trace_save_tgid(struct task_struct * tsk)2490 static int trace_save_tgid(struct task_struct *tsk)
2491 {
2492 	int *ptr;
2493 
2494 	/* treat recording of idle task as a success */
2495 	if (!tsk->pid)
2496 		return 1;
2497 
2498 	ptr = trace_find_tgid_ptr(tsk->pid);
2499 	if (!ptr)
2500 		return 0;
2501 
2502 	*ptr = tsk->tgid;
2503 	return 1;
2504 }
2505 
tracing_record_taskinfo_skip(int flags)2506 static bool tracing_record_taskinfo_skip(int flags)
2507 {
2508 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2509 		return true;
2510 	if (!__this_cpu_read(trace_taskinfo_save))
2511 		return true;
2512 	return false;
2513 }
2514 
2515 /**
2516  * tracing_record_taskinfo - record the task info of a task
2517  *
2518  * @task:  task to record
2519  * @flags: TRACE_RECORD_CMDLINE for recording comm
2520  *         TRACE_RECORD_TGID for recording tgid
2521  */
tracing_record_taskinfo(struct task_struct * task,int flags)2522 void tracing_record_taskinfo(struct task_struct *task, int flags)
2523 {
2524 	bool done;
2525 
2526 	if (tracing_record_taskinfo_skip(flags))
2527 		return;
2528 
2529 	/*
2530 	 * Record as much task information as possible. If some fail, continue
2531 	 * to try to record the others.
2532 	 */
2533 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2534 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2535 
2536 	/* If recording any information failed, retry again soon. */
2537 	if (!done)
2538 		return;
2539 
2540 	__this_cpu_write(trace_taskinfo_save, false);
2541 }
2542 
2543 /**
2544  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2545  *
2546  * @prev: previous task during sched_switch
2547  * @next: next task during sched_switch
2548  * @flags: TRACE_RECORD_CMDLINE for recording comm
2549  *         TRACE_RECORD_TGID for recording tgid
2550  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2551 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2552 					  struct task_struct *next, int flags)
2553 {
2554 	bool done;
2555 
2556 	if (tracing_record_taskinfo_skip(flags))
2557 		return;
2558 
2559 	/*
2560 	 * Record as much task information as possible. If some fail, continue
2561 	 * to try to record the others.
2562 	 */
2563 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2564 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2565 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2566 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2567 
2568 	/* If recording any information failed, retry again soon. */
2569 	if (!done)
2570 		return;
2571 
2572 	__this_cpu_write(trace_taskinfo_save, false);
2573 }
2574 
2575 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2576 void tracing_record_cmdline(struct task_struct *task)
2577 {
2578 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2579 }
2580 
tracing_record_tgid(struct task_struct * task)2581 void tracing_record_tgid(struct task_struct *task)
2582 {
2583 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2584 }
2585 
2586 /*
2587  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2588  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2589  * simplifies those functions and keeps them in sync.
2590  */
trace_handle_return(struct trace_seq * s)2591 enum print_line_t trace_handle_return(struct trace_seq *s)
2592 {
2593 	return trace_seq_has_overflowed(s) ?
2594 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2595 }
2596 EXPORT_SYMBOL_GPL(trace_handle_return);
2597 
2598 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2599 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2600 			     unsigned long flags, int pc)
2601 {
2602 	struct task_struct *tsk = current;
2603 
2604 	entry->preempt_count		= pc & 0xff;
2605 	entry->pid			= (tsk) ? tsk->pid : 0;
2606 	entry->type			= type;
2607 	entry->flags =
2608 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2609 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2610 #else
2611 		TRACE_FLAG_IRQS_NOSUPPORT |
2612 #endif
2613 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2614 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2615 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2616 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2617 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2618 }
2619 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2620 
2621 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2622 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2623 			  int type,
2624 			  unsigned long len,
2625 			  unsigned long flags, int pc)
2626 {
2627 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2628 }
2629 
2630 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2631 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2632 static int trace_buffered_event_ref;
2633 
2634 /**
2635  * trace_buffered_event_enable - enable buffering events
2636  *
2637  * When events are being filtered, it is quicker to use a temporary
2638  * buffer to write the event data into if there's a likely chance
2639  * that it will not be committed. The discard of the ring buffer
2640  * is not as fast as committing, and is much slower than copying
2641  * a commit.
2642  *
2643  * When an event is to be filtered, allocate per cpu buffers to
2644  * write the event data into, and if the event is filtered and discarded
2645  * it is simply dropped, otherwise, the entire data is to be committed
2646  * in one shot.
2647  */
trace_buffered_event_enable(void)2648 void trace_buffered_event_enable(void)
2649 {
2650 	struct ring_buffer_event *event;
2651 	struct page *page;
2652 	int cpu;
2653 
2654 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2655 
2656 	if (trace_buffered_event_ref++)
2657 		return;
2658 
2659 	for_each_tracing_cpu(cpu) {
2660 		page = alloc_pages_node(cpu_to_node(cpu),
2661 					GFP_KERNEL | __GFP_NORETRY, 0);
2662 		/* This is just an optimization and can handle failures */
2663 		if (!page) {
2664 			pr_err("Failed to allocate event buffer\n");
2665 			break;
2666 		}
2667 
2668 		event = page_address(page);
2669 		memset(event, 0, sizeof(*event));
2670 
2671 		per_cpu(trace_buffered_event, cpu) = event;
2672 
2673 		preempt_disable();
2674 		if (cpu == smp_processor_id() &&
2675 		    __this_cpu_read(trace_buffered_event) !=
2676 		    per_cpu(trace_buffered_event, cpu))
2677 			WARN_ON_ONCE(1);
2678 		preempt_enable();
2679 	}
2680 }
2681 
enable_trace_buffered_event(void * data)2682 static void enable_trace_buffered_event(void *data)
2683 {
2684 	/* Probably not needed, but do it anyway */
2685 	smp_rmb();
2686 	this_cpu_dec(trace_buffered_event_cnt);
2687 }
2688 
disable_trace_buffered_event(void * data)2689 static void disable_trace_buffered_event(void *data)
2690 {
2691 	this_cpu_inc(trace_buffered_event_cnt);
2692 }
2693 
2694 /**
2695  * trace_buffered_event_disable - disable buffering events
2696  *
2697  * When a filter is removed, it is faster to not use the buffered
2698  * events, and to commit directly into the ring buffer. Free up
2699  * the temp buffers when there are no more users. This requires
2700  * special synchronization with current events.
2701  */
trace_buffered_event_disable(void)2702 void trace_buffered_event_disable(void)
2703 {
2704 	int cpu;
2705 
2706 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2707 
2708 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2709 		return;
2710 
2711 	if (--trace_buffered_event_ref)
2712 		return;
2713 
2714 	/* For each CPU, set the buffer as used. */
2715 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2716 			 NULL, true);
2717 
2718 	/* Wait for all current users to finish */
2719 	synchronize_rcu();
2720 
2721 	for_each_tracing_cpu(cpu) {
2722 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2723 		per_cpu(trace_buffered_event, cpu) = NULL;
2724 	}
2725 
2726 	/*
2727 	 * Wait for all CPUs that potentially started checking if they can use
2728 	 * their event buffer only after the previous synchronize_rcu() call and
2729 	 * they still read a valid pointer from trace_buffered_event. It must be
2730 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2731 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2732 	 */
2733 	synchronize_rcu();
2734 
2735 	/* For each CPU, relinquish the buffer */
2736 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2737 			 true);
2738 }
2739 
2740 static struct trace_buffer *temp_buffer;
2741 
2742 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2743 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2744 			  struct trace_event_file *trace_file,
2745 			  int type, unsigned long len,
2746 			  unsigned long flags, int pc)
2747 {
2748 	struct ring_buffer_event *entry;
2749 	int val;
2750 
2751 	*current_rb = trace_file->tr->array_buffer.buffer;
2752 
2753 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2754 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2755 	    (entry = this_cpu_read(trace_buffered_event))) {
2756 		/* Try to use the per cpu buffer first */
2757 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2758 		if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2759 			trace_event_setup(entry, type, flags, pc);
2760 			entry->array[0] = len;
2761 			return entry;
2762 		}
2763 		this_cpu_dec(trace_buffered_event_cnt);
2764 	}
2765 
2766 	entry = __trace_buffer_lock_reserve(*current_rb,
2767 					    type, len, flags, pc);
2768 	/*
2769 	 * If tracing is off, but we have triggers enabled
2770 	 * we still need to look at the event data. Use the temp_buffer
2771 	 * to store the trace event for the trigger to use. It's recursive
2772 	 * safe and will not be recorded anywhere.
2773 	 */
2774 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2775 		*current_rb = temp_buffer;
2776 		entry = __trace_buffer_lock_reserve(*current_rb,
2777 						    type, len, flags, pc);
2778 	}
2779 	return entry;
2780 }
2781 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2782 
2783 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2784 static DEFINE_MUTEX(tracepoint_printk_mutex);
2785 
output_printk(struct trace_event_buffer * fbuffer)2786 static void output_printk(struct trace_event_buffer *fbuffer)
2787 {
2788 	struct trace_event_call *event_call;
2789 	struct trace_event_file *file;
2790 	struct trace_event *event;
2791 	unsigned long flags;
2792 	struct trace_iterator *iter = tracepoint_print_iter;
2793 
2794 	/* We should never get here if iter is NULL */
2795 	if (WARN_ON_ONCE(!iter))
2796 		return;
2797 
2798 	event_call = fbuffer->trace_file->event_call;
2799 	if (!event_call || !event_call->event.funcs ||
2800 	    !event_call->event.funcs->trace)
2801 		return;
2802 
2803 	file = fbuffer->trace_file;
2804 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2805 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2806 	     !filter_match_preds(file->filter, fbuffer->entry)))
2807 		return;
2808 
2809 	event = &fbuffer->trace_file->event_call->event;
2810 
2811 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2812 	trace_seq_init(&iter->seq);
2813 	iter->ent = fbuffer->entry;
2814 	event_call->event.funcs->trace(iter, 0, event);
2815 	trace_seq_putc(&iter->seq, 0);
2816 	printk("%s", iter->seq.buffer);
2817 
2818 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2819 }
2820 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2821 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2822 			     void *buffer, size_t *lenp,
2823 			     loff_t *ppos)
2824 {
2825 	int save_tracepoint_printk;
2826 	int ret;
2827 
2828 	mutex_lock(&tracepoint_printk_mutex);
2829 	save_tracepoint_printk = tracepoint_printk;
2830 
2831 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2832 
2833 	/*
2834 	 * This will force exiting early, as tracepoint_printk
2835 	 * is always zero when tracepoint_printk_iter is not allocated
2836 	 */
2837 	if (!tracepoint_print_iter)
2838 		tracepoint_printk = 0;
2839 
2840 	if (save_tracepoint_printk == tracepoint_printk)
2841 		goto out;
2842 
2843 	if (tracepoint_printk)
2844 		static_key_enable(&tracepoint_printk_key.key);
2845 	else
2846 		static_key_disable(&tracepoint_printk_key.key);
2847 
2848  out:
2849 	mutex_unlock(&tracepoint_printk_mutex);
2850 
2851 	return ret;
2852 }
2853 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2854 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2855 {
2856 	if (static_key_false(&tracepoint_printk_key.key))
2857 		output_printk(fbuffer);
2858 
2859 	if (static_branch_unlikely(&trace_event_exports_enabled))
2860 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2861 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2862 				    fbuffer->event, fbuffer->entry,
2863 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2864 }
2865 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2866 
2867 /*
2868  * Skip 3:
2869  *
2870  *   trace_buffer_unlock_commit_regs()
2871  *   trace_event_buffer_commit()
2872  *   trace_event_raw_event_xxx()
2873  */
2874 # define STACK_SKIP 3
2875 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2876 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2877 				     struct trace_buffer *buffer,
2878 				     struct ring_buffer_event *event,
2879 				     unsigned long flags, int pc,
2880 				     struct pt_regs *regs)
2881 {
2882 	__buffer_unlock_commit(buffer, event);
2883 
2884 	/*
2885 	 * If regs is not set, then skip the necessary functions.
2886 	 * Note, we can still get here via blktrace, wakeup tracer
2887 	 * and mmiotrace, but that's ok if they lose a function or
2888 	 * two. They are not that meaningful.
2889 	 */
2890 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2891 	ftrace_trace_userstack(tr, buffer, flags, pc);
2892 }
2893 
2894 /*
2895  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2896  */
2897 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2898 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2899 				   struct ring_buffer_event *event)
2900 {
2901 	__buffer_unlock_commit(buffer, event);
2902 }
2903 
2904 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2905 trace_function(struct trace_array *tr,
2906 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2907 	       int pc)
2908 {
2909 	struct trace_event_call *call = &event_function;
2910 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2911 	struct ring_buffer_event *event;
2912 	struct ftrace_entry *entry;
2913 
2914 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2915 					    flags, pc);
2916 	if (!event)
2917 		return;
2918 	entry	= ring_buffer_event_data(event);
2919 	entry->ip			= ip;
2920 	entry->parent_ip		= parent_ip;
2921 
2922 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2923 		if (static_branch_unlikely(&trace_function_exports_enabled))
2924 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2925 		__buffer_unlock_commit(buffer, event);
2926 	}
2927 }
2928 
2929 #ifdef CONFIG_STACKTRACE
2930 
2931 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2932 #define FTRACE_KSTACK_NESTING	4
2933 
2934 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2935 
2936 struct ftrace_stack {
2937 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2938 };
2939 
2940 
2941 struct ftrace_stacks {
2942 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2943 };
2944 
2945 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2946 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2947 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2948 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2949 				 unsigned long flags,
2950 				 int skip, int pc, struct pt_regs *regs)
2951 {
2952 	struct trace_event_call *call = &event_kernel_stack;
2953 	struct ring_buffer_event *event;
2954 	unsigned int size, nr_entries;
2955 	struct ftrace_stack *fstack;
2956 	struct stack_entry *entry;
2957 	int stackidx;
2958 
2959 	/*
2960 	 * Add one, for this function and the call to save_stack_trace()
2961 	 * If regs is set, then these functions will not be in the way.
2962 	 */
2963 #ifndef CONFIG_UNWINDER_ORC
2964 	if (!regs)
2965 		skip++;
2966 #endif
2967 
2968 	preempt_disable_notrace();
2969 
2970 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2971 
2972 	/* This should never happen. If it does, yell once and skip */
2973 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2974 		goto out;
2975 
2976 	/*
2977 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2978 	 * interrupt will either see the value pre increment or post
2979 	 * increment. If the interrupt happens pre increment it will have
2980 	 * restored the counter when it returns.  We just need a barrier to
2981 	 * keep gcc from moving things around.
2982 	 */
2983 	barrier();
2984 
2985 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2986 	size = ARRAY_SIZE(fstack->calls);
2987 
2988 	if (regs) {
2989 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2990 						   size, skip);
2991 	} else {
2992 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2993 	}
2994 
2995 	size = nr_entries * sizeof(unsigned long);
2996 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2997 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
2998 				    flags, pc);
2999 	if (!event)
3000 		goto out;
3001 	entry = ring_buffer_event_data(event);
3002 
3003 	memcpy(&entry->caller, fstack->calls, size);
3004 	entry->size = nr_entries;
3005 
3006 	if (!call_filter_check_discard(call, entry, buffer, event))
3007 		__buffer_unlock_commit(buffer, event);
3008 
3009  out:
3010 	/* Again, don't let gcc optimize things here */
3011 	barrier();
3012 	__this_cpu_dec(ftrace_stack_reserve);
3013 	preempt_enable_notrace();
3014 
3015 }
3016 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)3017 static inline void ftrace_trace_stack(struct trace_array *tr,
3018 				      struct trace_buffer *buffer,
3019 				      unsigned long flags,
3020 				      int skip, int pc, struct pt_regs *regs)
3021 {
3022 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3023 		return;
3024 
3025 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
3026 }
3027 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)3028 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3029 		   int pc)
3030 {
3031 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3032 
3033 	if (rcu_is_watching()) {
3034 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3035 		return;
3036 	}
3037 
3038 	/*
3039 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3040 	 * but if the above rcu_is_watching() failed, then the NMI
3041 	 * triggered someplace critical, and rcu_irq_enter() should
3042 	 * not be called from NMI.
3043 	 */
3044 	if (unlikely(in_nmi()))
3045 		return;
3046 
3047 	rcu_irq_enter_irqson();
3048 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3049 	rcu_irq_exit_irqson();
3050 }
3051 
3052 /**
3053  * trace_dump_stack - record a stack back trace in the trace buffer
3054  * @skip: Number of functions to skip (helper handlers)
3055  */
trace_dump_stack(int skip)3056 void trace_dump_stack(int skip)
3057 {
3058 	unsigned long flags;
3059 
3060 	if (tracing_disabled || tracing_selftest_running)
3061 		return;
3062 
3063 	local_save_flags(flags);
3064 
3065 #ifndef CONFIG_UNWINDER_ORC
3066 	/* Skip 1 to skip this function. */
3067 	skip++;
3068 #endif
3069 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3070 			     flags, skip, preempt_count(), NULL);
3071 }
3072 EXPORT_SYMBOL_GPL(trace_dump_stack);
3073 
3074 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3075 static DEFINE_PER_CPU(int, user_stack_count);
3076 
3077 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3078 ftrace_trace_userstack(struct trace_array *tr,
3079 		       struct trace_buffer *buffer, unsigned long flags, int pc)
3080 {
3081 	struct trace_event_call *call = &event_user_stack;
3082 	struct ring_buffer_event *event;
3083 	struct userstack_entry *entry;
3084 
3085 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3086 		return;
3087 
3088 	/*
3089 	 * NMIs can not handle page faults, even with fix ups.
3090 	 * The save user stack can (and often does) fault.
3091 	 */
3092 	if (unlikely(in_nmi()))
3093 		return;
3094 
3095 	/*
3096 	 * prevent recursion, since the user stack tracing may
3097 	 * trigger other kernel events.
3098 	 */
3099 	preempt_disable();
3100 	if (__this_cpu_read(user_stack_count))
3101 		goto out;
3102 
3103 	__this_cpu_inc(user_stack_count);
3104 
3105 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3106 					    sizeof(*entry), flags, pc);
3107 	if (!event)
3108 		goto out_drop_count;
3109 	entry	= ring_buffer_event_data(event);
3110 
3111 	entry->tgid		= current->tgid;
3112 	memset(&entry->caller, 0, sizeof(entry->caller));
3113 
3114 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3115 	if (!call_filter_check_discard(call, entry, buffer, event))
3116 		__buffer_unlock_commit(buffer, event);
3117 
3118  out_drop_count:
3119 	__this_cpu_dec(user_stack_count);
3120  out:
3121 	preempt_enable();
3122 }
3123 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3124 static void ftrace_trace_userstack(struct trace_array *tr,
3125 				   struct trace_buffer *buffer,
3126 				   unsigned long flags, int pc)
3127 {
3128 }
3129 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3130 
3131 #endif /* CONFIG_STACKTRACE */
3132 
3133 /* created for use with alloc_percpu */
3134 struct trace_buffer_struct {
3135 	int nesting;
3136 	char buffer[4][TRACE_BUF_SIZE];
3137 };
3138 
3139 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3140 
3141 /*
3142  * Thise allows for lockless recording.  If we're nested too deeply, then
3143  * this returns NULL.
3144  */
get_trace_buf(void)3145 static char *get_trace_buf(void)
3146 {
3147 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3148 
3149 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3150 		return NULL;
3151 
3152 	buffer->nesting++;
3153 
3154 	/* Interrupts must see nesting incremented before we use the buffer */
3155 	barrier();
3156 	return &buffer->buffer[buffer->nesting - 1][0];
3157 }
3158 
put_trace_buf(void)3159 static void put_trace_buf(void)
3160 {
3161 	/* Don't let the decrement of nesting leak before this */
3162 	barrier();
3163 	this_cpu_dec(trace_percpu_buffer->nesting);
3164 }
3165 
alloc_percpu_trace_buffer(void)3166 static int alloc_percpu_trace_buffer(void)
3167 {
3168 	struct trace_buffer_struct __percpu *buffers;
3169 
3170 	if (trace_percpu_buffer)
3171 		return 0;
3172 
3173 	buffers = alloc_percpu(struct trace_buffer_struct);
3174 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3175 		return -ENOMEM;
3176 
3177 	trace_percpu_buffer = buffers;
3178 	return 0;
3179 }
3180 
3181 static int buffers_allocated;
3182 
trace_printk_init_buffers(void)3183 void trace_printk_init_buffers(void)
3184 {
3185 	if (buffers_allocated)
3186 		return;
3187 
3188 	if (alloc_percpu_trace_buffer())
3189 		return;
3190 
3191 	/* trace_printk() is for debug use only. Don't use it in production. */
3192 
3193 	pr_warn("\n");
3194 	pr_warn("**********************************************************\n");
3195 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3196 	pr_warn("**                                                      **\n");
3197 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3198 	pr_warn("**                                                      **\n");
3199 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3200 	pr_warn("** unsafe for production use.                           **\n");
3201 	pr_warn("**                                                      **\n");
3202 	pr_warn("** If you see this message and you are not debugging    **\n");
3203 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3204 	pr_warn("**                                                      **\n");
3205 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3206 	pr_warn("**********************************************************\n");
3207 
3208 	/* Expand the buffers to set size */
3209 	tracing_update_buffers();
3210 
3211 	buffers_allocated = 1;
3212 
3213 	/*
3214 	 * trace_printk_init_buffers() can be called by modules.
3215 	 * If that happens, then we need to start cmdline recording
3216 	 * directly here. If the global_trace.buffer is already
3217 	 * allocated here, then this was called by module code.
3218 	 */
3219 	if (global_trace.array_buffer.buffer)
3220 		tracing_start_cmdline_record();
3221 }
3222 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3223 
trace_printk_start_comm(void)3224 void trace_printk_start_comm(void)
3225 {
3226 	/* Start tracing comms if trace printk is set */
3227 	if (!buffers_allocated)
3228 		return;
3229 	tracing_start_cmdline_record();
3230 }
3231 
trace_printk_start_stop_comm(int enabled)3232 static void trace_printk_start_stop_comm(int enabled)
3233 {
3234 	if (!buffers_allocated)
3235 		return;
3236 
3237 	if (enabled)
3238 		tracing_start_cmdline_record();
3239 	else
3240 		tracing_stop_cmdline_record();
3241 }
3242 
3243 /**
3244  * trace_vbprintk - write binary msg to tracing buffer
3245  * @ip:    The address of the caller
3246  * @fmt:   The string format to write to the buffer
3247  * @args:  Arguments for @fmt
3248  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3249 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3250 {
3251 	struct trace_event_call *call = &event_bprint;
3252 	struct ring_buffer_event *event;
3253 	struct trace_buffer *buffer;
3254 	struct trace_array *tr = &global_trace;
3255 	struct bprint_entry *entry;
3256 	unsigned long flags;
3257 	char *tbuffer;
3258 	int len = 0, size, pc;
3259 
3260 	if (unlikely(tracing_selftest_running || tracing_disabled))
3261 		return 0;
3262 
3263 	/* Don't pollute graph traces with trace_vprintk internals */
3264 	pause_graph_tracing();
3265 
3266 	pc = preempt_count();
3267 	preempt_disable_notrace();
3268 
3269 	tbuffer = get_trace_buf();
3270 	if (!tbuffer) {
3271 		len = 0;
3272 		goto out_nobuffer;
3273 	}
3274 
3275 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3276 
3277 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3278 		goto out_put;
3279 
3280 	local_save_flags(flags);
3281 	size = sizeof(*entry) + sizeof(u32) * len;
3282 	buffer = tr->array_buffer.buffer;
3283 	ring_buffer_nest_start(buffer);
3284 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3285 					    flags, pc);
3286 	if (!event)
3287 		goto out;
3288 	entry = ring_buffer_event_data(event);
3289 	entry->ip			= ip;
3290 	entry->fmt			= fmt;
3291 
3292 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3293 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3294 		__buffer_unlock_commit(buffer, event);
3295 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3296 	}
3297 
3298 out:
3299 	ring_buffer_nest_end(buffer);
3300 out_put:
3301 	put_trace_buf();
3302 
3303 out_nobuffer:
3304 	preempt_enable_notrace();
3305 	unpause_graph_tracing();
3306 
3307 	return len;
3308 }
3309 EXPORT_SYMBOL_GPL(trace_vbprintk);
3310 
3311 __printf(3, 0)
3312 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3313 __trace_array_vprintk(struct trace_buffer *buffer,
3314 		      unsigned long ip, const char *fmt, va_list args)
3315 {
3316 	struct trace_event_call *call = &event_print;
3317 	struct ring_buffer_event *event;
3318 	int len = 0, size, pc;
3319 	struct print_entry *entry;
3320 	unsigned long flags;
3321 	char *tbuffer;
3322 
3323 	if (tracing_disabled || tracing_selftest_running)
3324 		return 0;
3325 
3326 	/* Don't pollute graph traces with trace_vprintk internals */
3327 	pause_graph_tracing();
3328 
3329 	pc = preempt_count();
3330 	preempt_disable_notrace();
3331 
3332 
3333 	tbuffer = get_trace_buf();
3334 	if (!tbuffer) {
3335 		len = 0;
3336 		goto out_nobuffer;
3337 	}
3338 
3339 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3340 
3341 	local_save_flags(flags);
3342 	size = sizeof(*entry) + len + 1;
3343 	ring_buffer_nest_start(buffer);
3344 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3345 					    flags, pc);
3346 	if (!event)
3347 		goto out;
3348 	entry = ring_buffer_event_data(event);
3349 	entry->ip = ip;
3350 
3351 	memcpy(&entry->buf, tbuffer, len + 1);
3352 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3353 		__buffer_unlock_commit(buffer, event);
3354 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3355 	}
3356 
3357 out:
3358 	ring_buffer_nest_end(buffer);
3359 	put_trace_buf();
3360 
3361 out_nobuffer:
3362 	preempt_enable_notrace();
3363 	unpause_graph_tracing();
3364 
3365 	return len;
3366 }
3367 
3368 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3369 int trace_array_vprintk(struct trace_array *tr,
3370 			unsigned long ip, const char *fmt, va_list args)
3371 {
3372 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3373 }
3374 
3375 /**
3376  * trace_array_printk - Print a message to a specific instance
3377  * @tr: The instance trace_array descriptor
3378  * @ip: The instruction pointer that this is called from.
3379  * @fmt: The format to print (printf format)
3380  *
3381  * If a subsystem sets up its own instance, they have the right to
3382  * printk strings into their tracing instance buffer using this
3383  * function. Note, this function will not write into the top level
3384  * buffer (use trace_printk() for that), as writing into the top level
3385  * buffer should only have events that can be individually disabled.
3386  * trace_printk() is only used for debugging a kernel, and should not
3387  * be ever encorporated in normal use.
3388  *
3389  * trace_array_printk() can be used, as it will not add noise to the
3390  * top level tracing buffer.
3391  *
3392  * Note, trace_array_init_printk() must be called on @tr before this
3393  * can be used.
3394  */
3395 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3396 int trace_array_printk(struct trace_array *tr,
3397 		       unsigned long ip, const char *fmt, ...)
3398 {
3399 	int ret;
3400 	va_list ap;
3401 
3402 	if (!tr)
3403 		return -ENOENT;
3404 
3405 	/* This is only allowed for created instances */
3406 	if (tr == &global_trace)
3407 		return 0;
3408 
3409 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3410 		return 0;
3411 
3412 	va_start(ap, fmt);
3413 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3414 	va_end(ap);
3415 	return ret;
3416 }
3417 EXPORT_SYMBOL_GPL(trace_array_printk);
3418 
3419 /**
3420  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3421  * @tr: The trace array to initialize the buffers for
3422  *
3423  * As trace_array_printk() only writes into instances, they are OK to
3424  * have in the kernel (unlike trace_printk()). This needs to be called
3425  * before trace_array_printk() can be used on a trace_array.
3426  */
trace_array_init_printk(struct trace_array * tr)3427 int trace_array_init_printk(struct trace_array *tr)
3428 {
3429 	if (!tr)
3430 		return -ENOENT;
3431 
3432 	/* This is only allowed for created instances */
3433 	if (tr == &global_trace)
3434 		return -EINVAL;
3435 
3436 	return alloc_percpu_trace_buffer();
3437 }
3438 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3439 
3440 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3441 int trace_array_printk_buf(struct trace_buffer *buffer,
3442 			   unsigned long ip, const char *fmt, ...)
3443 {
3444 	int ret;
3445 	va_list ap;
3446 
3447 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3448 		return 0;
3449 
3450 	va_start(ap, fmt);
3451 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3452 	va_end(ap);
3453 	return ret;
3454 }
3455 
3456 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3457 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3458 {
3459 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3460 }
3461 EXPORT_SYMBOL_GPL(trace_vprintk);
3462 
trace_iterator_increment(struct trace_iterator * iter)3463 static void trace_iterator_increment(struct trace_iterator *iter)
3464 {
3465 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3466 
3467 	iter->idx++;
3468 	if (buf_iter)
3469 		ring_buffer_iter_advance(buf_iter);
3470 }
3471 
3472 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3473 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3474 		unsigned long *lost_events)
3475 {
3476 	struct ring_buffer_event *event;
3477 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3478 
3479 	if (buf_iter) {
3480 		event = ring_buffer_iter_peek(buf_iter, ts);
3481 		if (lost_events)
3482 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3483 				(unsigned long)-1 : 0;
3484 	} else {
3485 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3486 					 lost_events);
3487 	}
3488 
3489 	if (event) {
3490 		iter->ent_size = ring_buffer_event_length(event);
3491 		return ring_buffer_event_data(event);
3492 	}
3493 	iter->ent_size = 0;
3494 	return NULL;
3495 }
3496 
3497 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3498 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3499 		  unsigned long *missing_events, u64 *ent_ts)
3500 {
3501 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3502 	struct trace_entry *ent, *next = NULL;
3503 	unsigned long lost_events = 0, next_lost = 0;
3504 	int cpu_file = iter->cpu_file;
3505 	u64 next_ts = 0, ts;
3506 	int next_cpu = -1;
3507 	int next_size = 0;
3508 	int cpu;
3509 
3510 	/*
3511 	 * If we are in a per_cpu trace file, don't bother by iterating over
3512 	 * all cpu and peek directly.
3513 	 */
3514 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3515 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3516 			return NULL;
3517 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3518 		if (ent_cpu)
3519 			*ent_cpu = cpu_file;
3520 
3521 		return ent;
3522 	}
3523 
3524 	for_each_tracing_cpu(cpu) {
3525 
3526 		if (ring_buffer_empty_cpu(buffer, cpu))
3527 			continue;
3528 
3529 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3530 
3531 		/*
3532 		 * Pick the entry with the smallest timestamp:
3533 		 */
3534 		if (ent && (!next || ts < next_ts)) {
3535 			next = ent;
3536 			next_cpu = cpu;
3537 			next_ts = ts;
3538 			next_lost = lost_events;
3539 			next_size = iter->ent_size;
3540 		}
3541 	}
3542 
3543 	iter->ent_size = next_size;
3544 
3545 	if (ent_cpu)
3546 		*ent_cpu = next_cpu;
3547 
3548 	if (ent_ts)
3549 		*ent_ts = next_ts;
3550 
3551 	if (missing_events)
3552 		*missing_events = next_lost;
3553 
3554 	return next;
3555 }
3556 
3557 #define STATIC_FMT_BUF_SIZE	128
3558 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3559 
trace_iter_expand_format(struct trace_iterator * iter)3560 static char *trace_iter_expand_format(struct trace_iterator *iter)
3561 {
3562 	char *tmp;
3563 
3564 	if (iter->fmt == static_fmt_buf)
3565 		return NULL;
3566 
3567 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3568 		       GFP_KERNEL);
3569 	if (tmp) {
3570 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3571 		iter->fmt = tmp;
3572 	}
3573 
3574 	return tmp;
3575 }
3576 
trace_event_format(struct trace_iterator * iter,const char * fmt)3577 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3578 {
3579 	const char *p, *new_fmt;
3580 	char *q;
3581 
3582 	if (WARN_ON_ONCE(!fmt))
3583 		return fmt;
3584 
3585 	p = fmt;
3586 	new_fmt = q = iter->fmt;
3587 	while (*p) {
3588 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3589 			if (!trace_iter_expand_format(iter))
3590 				return fmt;
3591 
3592 			q += iter->fmt - new_fmt;
3593 			new_fmt = iter->fmt;
3594 		}
3595 
3596 		*q++ = *p++;
3597 
3598 		/* Replace %p with %px */
3599 		if (p[-1] == '%') {
3600 			if (p[0] == '%') {
3601 				*q++ = *p++;
3602 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3603 				*q++ = *p++;
3604 				*q++ = 'x';
3605 			}
3606 		}
3607 	}
3608 	*q = '\0';
3609 
3610 	return new_fmt;
3611 }
3612 
3613 #define STATIC_TEMP_BUF_SIZE	128
3614 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3615 
3616 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3617 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3618 					  int *ent_cpu, u64 *ent_ts)
3619 {
3620 	/* __find_next_entry will reset ent_size */
3621 	int ent_size = iter->ent_size;
3622 	struct trace_entry *entry;
3623 
3624 	/*
3625 	 * If called from ftrace_dump(), then the iter->temp buffer
3626 	 * will be the static_temp_buf and not created from kmalloc.
3627 	 * If the entry size is greater than the buffer, we can
3628 	 * not save it. Just return NULL in that case. This is only
3629 	 * used to add markers when two consecutive events' time
3630 	 * stamps have a large delta. See trace_print_lat_context()
3631 	 */
3632 	if (iter->temp == static_temp_buf &&
3633 	    STATIC_TEMP_BUF_SIZE < ent_size)
3634 		return NULL;
3635 
3636 	/*
3637 	 * The __find_next_entry() may call peek_next_entry(), which may
3638 	 * call ring_buffer_peek() that may make the contents of iter->ent
3639 	 * undefined. Need to copy iter->ent now.
3640 	 */
3641 	if (iter->ent && iter->ent != iter->temp) {
3642 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3643 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3644 			void *temp;
3645 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3646 			if (!temp)
3647 				return NULL;
3648 			kfree(iter->temp);
3649 			iter->temp = temp;
3650 			iter->temp_size = iter->ent_size;
3651 		}
3652 		memcpy(iter->temp, iter->ent, iter->ent_size);
3653 		iter->ent = iter->temp;
3654 	}
3655 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3656 	/* Put back the original ent_size */
3657 	iter->ent_size = ent_size;
3658 
3659 	return entry;
3660 }
3661 
3662 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3663 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3664 {
3665 	iter->ent = __find_next_entry(iter, &iter->cpu,
3666 				      &iter->lost_events, &iter->ts);
3667 
3668 	if (iter->ent)
3669 		trace_iterator_increment(iter);
3670 
3671 	return iter->ent ? iter : NULL;
3672 }
3673 
trace_consume(struct trace_iterator * iter)3674 static void trace_consume(struct trace_iterator *iter)
3675 {
3676 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3677 			    &iter->lost_events);
3678 }
3679 
s_next(struct seq_file * m,void * v,loff_t * pos)3680 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3681 {
3682 	struct trace_iterator *iter = m->private;
3683 	int i = (int)*pos;
3684 	void *ent;
3685 
3686 	WARN_ON_ONCE(iter->leftover);
3687 
3688 	(*pos)++;
3689 
3690 	/* can't go backwards */
3691 	if (iter->idx > i)
3692 		return NULL;
3693 
3694 	if (iter->idx < 0)
3695 		ent = trace_find_next_entry_inc(iter);
3696 	else
3697 		ent = iter;
3698 
3699 	while (ent && iter->idx < i)
3700 		ent = trace_find_next_entry_inc(iter);
3701 
3702 	iter->pos = *pos;
3703 
3704 	return ent;
3705 }
3706 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3707 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3708 {
3709 	struct ring_buffer_iter *buf_iter;
3710 	unsigned long entries = 0;
3711 	u64 ts;
3712 
3713 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3714 
3715 	buf_iter = trace_buffer_iter(iter, cpu);
3716 	if (!buf_iter)
3717 		return;
3718 
3719 	ring_buffer_iter_reset(buf_iter);
3720 
3721 	/*
3722 	 * We could have the case with the max latency tracers
3723 	 * that a reset never took place on a cpu. This is evident
3724 	 * by the timestamp being before the start of the buffer.
3725 	 */
3726 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3727 		if (ts >= iter->array_buffer->time_start)
3728 			break;
3729 		entries++;
3730 		ring_buffer_iter_advance(buf_iter);
3731 	}
3732 
3733 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3734 }
3735 
3736 /*
3737  * The current tracer is copied to avoid a global locking
3738  * all around.
3739  */
s_start(struct seq_file * m,loff_t * pos)3740 static void *s_start(struct seq_file *m, loff_t *pos)
3741 {
3742 	struct trace_iterator *iter = m->private;
3743 	struct trace_array *tr = iter->tr;
3744 	int cpu_file = iter->cpu_file;
3745 	void *p = NULL;
3746 	loff_t l = 0;
3747 	int cpu;
3748 
3749 	/*
3750 	 * copy the tracer to avoid using a global lock all around.
3751 	 * iter->trace is a copy of current_trace, the pointer to the
3752 	 * name may be used instead of a strcmp(), as iter->trace->name
3753 	 * will point to the same string as current_trace->name.
3754 	 */
3755 	mutex_lock(&trace_types_lock);
3756 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3757 		/* Close iter->trace before switching to the new current tracer */
3758 		if (iter->trace->close)
3759 			iter->trace->close(iter);
3760 		*iter->trace = *tr->current_trace;
3761 		/* Reopen the new current tracer */
3762 		if (iter->trace->open)
3763 			iter->trace->open(iter);
3764 	}
3765 	mutex_unlock(&trace_types_lock);
3766 
3767 #ifdef CONFIG_TRACER_MAX_TRACE
3768 	if (iter->snapshot && iter->trace->use_max_tr)
3769 		return ERR_PTR(-EBUSY);
3770 #endif
3771 
3772 	if (*pos != iter->pos) {
3773 		iter->ent = NULL;
3774 		iter->cpu = 0;
3775 		iter->idx = -1;
3776 
3777 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3778 			for_each_tracing_cpu(cpu)
3779 				tracing_iter_reset(iter, cpu);
3780 		} else
3781 			tracing_iter_reset(iter, cpu_file);
3782 
3783 		iter->leftover = 0;
3784 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3785 			;
3786 
3787 	} else {
3788 		/*
3789 		 * If we overflowed the seq_file before, then we want
3790 		 * to just reuse the trace_seq buffer again.
3791 		 */
3792 		if (iter->leftover)
3793 			p = iter;
3794 		else {
3795 			l = *pos - 1;
3796 			p = s_next(m, p, &l);
3797 		}
3798 	}
3799 
3800 	trace_event_read_lock();
3801 	trace_access_lock(cpu_file);
3802 	return p;
3803 }
3804 
s_stop(struct seq_file * m,void * p)3805 static void s_stop(struct seq_file *m, void *p)
3806 {
3807 	struct trace_iterator *iter = m->private;
3808 
3809 #ifdef CONFIG_TRACER_MAX_TRACE
3810 	if (iter->snapshot && iter->trace->use_max_tr)
3811 		return;
3812 #endif
3813 
3814 	trace_access_unlock(iter->cpu_file);
3815 	trace_event_read_unlock();
3816 }
3817 
3818 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3819 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3820 		      unsigned long *entries, int cpu)
3821 {
3822 	unsigned long count;
3823 
3824 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3825 	/*
3826 	 * If this buffer has skipped entries, then we hold all
3827 	 * entries for the trace and we need to ignore the
3828 	 * ones before the time stamp.
3829 	 */
3830 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3831 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3832 		/* total is the same as the entries */
3833 		*total = count;
3834 	} else
3835 		*total = count +
3836 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3837 	*entries = count;
3838 }
3839 
3840 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3841 get_total_entries(struct array_buffer *buf,
3842 		  unsigned long *total, unsigned long *entries)
3843 {
3844 	unsigned long t, e;
3845 	int cpu;
3846 
3847 	*total = 0;
3848 	*entries = 0;
3849 
3850 	for_each_tracing_cpu(cpu) {
3851 		get_total_entries_cpu(buf, &t, &e, cpu);
3852 		*total += t;
3853 		*entries += e;
3854 	}
3855 }
3856 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3857 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3858 {
3859 	unsigned long total, entries;
3860 
3861 	if (!tr)
3862 		tr = &global_trace;
3863 
3864 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3865 
3866 	return entries;
3867 }
3868 
trace_total_entries(struct trace_array * tr)3869 unsigned long trace_total_entries(struct trace_array *tr)
3870 {
3871 	unsigned long total, entries;
3872 
3873 	if (!tr)
3874 		tr = &global_trace;
3875 
3876 	get_total_entries(&tr->array_buffer, &total, &entries);
3877 
3878 	return entries;
3879 }
3880 
print_lat_help_header(struct seq_file * m)3881 static void print_lat_help_header(struct seq_file *m)
3882 {
3883 	seq_puts(m, "#                    _------=> CPU#            \n"
3884 		    "#                   / _-----=> irqs-off        \n"
3885 		    "#                  | / _----=> need-resched    \n"
3886 		    "#                  || / _---=> hardirq/softirq \n"
3887 		    "#                  ||| / _--=> preempt-depth   \n"
3888 		    "#                  |||| /     delay            \n"
3889 		    "#  cmd     pid     ||||| time  |   caller      \n"
3890 		    "#     \\   /        |||||  \\    |   /         \n");
3891 }
3892 
print_event_info(struct array_buffer * buf,struct seq_file * m)3893 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3894 {
3895 	unsigned long total;
3896 	unsigned long entries;
3897 
3898 	get_total_entries(buf, &total, &entries);
3899 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3900 		   entries, total, num_online_cpus());
3901 	seq_puts(m, "#\n");
3902 }
3903 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3904 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3905 				   unsigned int flags)
3906 {
3907 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3908 
3909 	print_event_info(buf, m);
3910 
3911 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3912 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3913 }
3914 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3915 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3916 				       unsigned int flags)
3917 {
3918 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3919 	const char *space = "            ";
3920 	int prec = tgid ? 12 : 2;
3921 
3922 	print_event_info(buf, m);
3923 
3924 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3925 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3926 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3927 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3928 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3929 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3930 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3931 }
3932 
3933 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3934 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3935 {
3936 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3937 	struct array_buffer *buf = iter->array_buffer;
3938 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3939 	struct tracer *type = iter->trace;
3940 	unsigned long entries;
3941 	unsigned long total;
3942 	const char *name = "preemption";
3943 
3944 	name = type->name;
3945 
3946 	get_total_entries(buf, &total, &entries);
3947 
3948 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3949 		   name, UTS_RELEASE);
3950 	seq_puts(m, "# -----------------------------------"
3951 		 "---------------------------------\n");
3952 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3953 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3954 		   nsecs_to_usecs(data->saved_latency),
3955 		   entries,
3956 		   total,
3957 		   buf->cpu,
3958 #if defined(CONFIG_PREEMPT_NONE)
3959 		   "server",
3960 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3961 		   "desktop",
3962 #elif defined(CONFIG_PREEMPT)
3963 		   "preempt",
3964 #elif defined(CONFIG_PREEMPT_RT)
3965 		   "preempt_rt",
3966 #else
3967 		   "unknown",
3968 #endif
3969 		   /* These are reserved for later use */
3970 		   0, 0, 0, 0);
3971 #ifdef CONFIG_SMP
3972 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3973 #else
3974 	seq_puts(m, ")\n");
3975 #endif
3976 	seq_puts(m, "#    -----------------\n");
3977 	seq_printf(m, "#    | task: %.16s-%d "
3978 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3979 		   data->comm, data->pid,
3980 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3981 		   data->policy, data->rt_priority);
3982 	seq_puts(m, "#    -----------------\n");
3983 
3984 	if (data->critical_start) {
3985 		seq_puts(m, "#  => started at: ");
3986 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3987 		trace_print_seq(m, &iter->seq);
3988 		seq_puts(m, "\n#  => ended at:   ");
3989 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3990 		trace_print_seq(m, &iter->seq);
3991 		seq_puts(m, "\n#\n");
3992 	}
3993 
3994 	seq_puts(m, "#\n");
3995 }
3996 
test_cpu_buff_start(struct trace_iterator * iter)3997 static void test_cpu_buff_start(struct trace_iterator *iter)
3998 {
3999 	struct trace_seq *s = &iter->seq;
4000 	struct trace_array *tr = iter->tr;
4001 
4002 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4003 		return;
4004 
4005 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4006 		return;
4007 
4008 	if (cpumask_available(iter->started) &&
4009 	    cpumask_test_cpu(iter->cpu, iter->started))
4010 		return;
4011 
4012 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4013 		return;
4014 
4015 	if (cpumask_available(iter->started))
4016 		cpumask_set_cpu(iter->cpu, iter->started);
4017 
4018 	/* Don't print started cpu buffer for the first entry of the trace */
4019 	if (iter->idx > 1)
4020 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4021 				iter->cpu);
4022 }
4023 
print_trace_fmt(struct trace_iterator * iter)4024 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4025 {
4026 	struct trace_array *tr = iter->tr;
4027 	struct trace_seq *s = &iter->seq;
4028 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4029 	struct trace_entry *entry;
4030 	struct trace_event *event;
4031 
4032 	entry = iter->ent;
4033 
4034 	test_cpu_buff_start(iter);
4035 
4036 	event = ftrace_find_event(entry->type);
4037 
4038 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4039 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4040 			trace_print_lat_context(iter);
4041 		else
4042 			trace_print_context(iter);
4043 	}
4044 
4045 	if (trace_seq_has_overflowed(s))
4046 		return TRACE_TYPE_PARTIAL_LINE;
4047 
4048 	if (event)
4049 		return event->funcs->trace(iter, sym_flags, event);
4050 
4051 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4052 
4053 	return trace_handle_return(s);
4054 }
4055 
print_raw_fmt(struct trace_iterator * iter)4056 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4057 {
4058 	struct trace_array *tr = iter->tr;
4059 	struct trace_seq *s = &iter->seq;
4060 	struct trace_entry *entry;
4061 	struct trace_event *event;
4062 
4063 	entry = iter->ent;
4064 
4065 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4066 		trace_seq_printf(s, "%d %d %llu ",
4067 				 entry->pid, iter->cpu, iter->ts);
4068 
4069 	if (trace_seq_has_overflowed(s))
4070 		return TRACE_TYPE_PARTIAL_LINE;
4071 
4072 	event = ftrace_find_event(entry->type);
4073 	if (event)
4074 		return event->funcs->raw(iter, 0, event);
4075 
4076 	trace_seq_printf(s, "%d ?\n", entry->type);
4077 
4078 	return trace_handle_return(s);
4079 }
4080 
print_hex_fmt(struct trace_iterator * iter)4081 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4082 {
4083 	struct trace_array *tr = iter->tr;
4084 	struct trace_seq *s = &iter->seq;
4085 	unsigned char newline = '\n';
4086 	struct trace_entry *entry;
4087 	struct trace_event *event;
4088 
4089 	entry = iter->ent;
4090 
4091 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4092 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4093 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4094 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4095 		if (trace_seq_has_overflowed(s))
4096 			return TRACE_TYPE_PARTIAL_LINE;
4097 	}
4098 
4099 	event = ftrace_find_event(entry->type);
4100 	if (event) {
4101 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4102 		if (ret != TRACE_TYPE_HANDLED)
4103 			return ret;
4104 	}
4105 
4106 	SEQ_PUT_FIELD(s, newline);
4107 
4108 	return trace_handle_return(s);
4109 }
4110 
print_bin_fmt(struct trace_iterator * iter)4111 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4112 {
4113 	struct trace_array *tr = iter->tr;
4114 	struct trace_seq *s = &iter->seq;
4115 	struct trace_entry *entry;
4116 	struct trace_event *event;
4117 
4118 	entry = iter->ent;
4119 
4120 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4121 		SEQ_PUT_FIELD(s, entry->pid);
4122 		SEQ_PUT_FIELD(s, iter->cpu);
4123 		SEQ_PUT_FIELD(s, iter->ts);
4124 		if (trace_seq_has_overflowed(s))
4125 			return TRACE_TYPE_PARTIAL_LINE;
4126 	}
4127 
4128 	event = ftrace_find_event(entry->type);
4129 	return event ? event->funcs->binary(iter, 0, event) :
4130 		TRACE_TYPE_HANDLED;
4131 }
4132 
trace_empty(struct trace_iterator * iter)4133 int trace_empty(struct trace_iterator *iter)
4134 {
4135 	struct ring_buffer_iter *buf_iter;
4136 	int cpu;
4137 
4138 	/* If we are looking at one CPU buffer, only check that one */
4139 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4140 		cpu = iter->cpu_file;
4141 		buf_iter = trace_buffer_iter(iter, cpu);
4142 		if (buf_iter) {
4143 			if (!ring_buffer_iter_empty(buf_iter))
4144 				return 0;
4145 		} else {
4146 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4147 				return 0;
4148 		}
4149 		return 1;
4150 	}
4151 
4152 	for_each_tracing_cpu(cpu) {
4153 		buf_iter = trace_buffer_iter(iter, cpu);
4154 		if (buf_iter) {
4155 			if (!ring_buffer_iter_empty(buf_iter))
4156 				return 0;
4157 		} else {
4158 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4159 				return 0;
4160 		}
4161 	}
4162 
4163 	return 1;
4164 }
4165 
4166 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4167 enum print_line_t print_trace_line(struct trace_iterator *iter)
4168 {
4169 	struct trace_array *tr = iter->tr;
4170 	unsigned long trace_flags = tr->trace_flags;
4171 	enum print_line_t ret;
4172 
4173 	if (iter->lost_events) {
4174 		if (iter->lost_events == (unsigned long)-1)
4175 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4176 					 iter->cpu);
4177 		else
4178 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4179 					 iter->cpu, iter->lost_events);
4180 		if (trace_seq_has_overflowed(&iter->seq))
4181 			return TRACE_TYPE_PARTIAL_LINE;
4182 	}
4183 
4184 	if (iter->trace && iter->trace->print_line) {
4185 		ret = iter->trace->print_line(iter);
4186 		if (ret != TRACE_TYPE_UNHANDLED)
4187 			return ret;
4188 	}
4189 
4190 	if (iter->ent->type == TRACE_BPUTS &&
4191 			trace_flags & TRACE_ITER_PRINTK &&
4192 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4193 		return trace_print_bputs_msg_only(iter);
4194 
4195 	if (iter->ent->type == TRACE_BPRINT &&
4196 			trace_flags & TRACE_ITER_PRINTK &&
4197 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4198 		return trace_print_bprintk_msg_only(iter);
4199 
4200 	if (iter->ent->type == TRACE_PRINT &&
4201 			trace_flags & TRACE_ITER_PRINTK &&
4202 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4203 		return trace_print_printk_msg_only(iter);
4204 
4205 	if (trace_flags & TRACE_ITER_BIN)
4206 		return print_bin_fmt(iter);
4207 
4208 	if (trace_flags & TRACE_ITER_HEX)
4209 		return print_hex_fmt(iter);
4210 
4211 	if (trace_flags & TRACE_ITER_RAW)
4212 		return print_raw_fmt(iter);
4213 
4214 	return print_trace_fmt(iter);
4215 }
4216 
trace_latency_header(struct seq_file * m)4217 void trace_latency_header(struct seq_file *m)
4218 {
4219 	struct trace_iterator *iter = m->private;
4220 	struct trace_array *tr = iter->tr;
4221 
4222 	/* print nothing if the buffers are empty */
4223 	if (trace_empty(iter))
4224 		return;
4225 
4226 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4227 		print_trace_header(m, iter);
4228 
4229 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4230 		print_lat_help_header(m);
4231 }
4232 
trace_default_header(struct seq_file * m)4233 void trace_default_header(struct seq_file *m)
4234 {
4235 	struct trace_iterator *iter = m->private;
4236 	struct trace_array *tr = iter->tr;
4237 	unsigned long trace_flags = tr->trace_flags;
4238 
4239 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4240 		return;
4241 
4242 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4243 		/* print nothing if the buffers are empty */
4244 		if (trace_empty(iter))
4245 			return;
4246 		print_trace_header(m, iter);
4247 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4248 			print_lat_help_header(m);
4249 	} else {
4250 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4251 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4252 				print_func_help_header_irq(iter->array_buffer,
4253 							   m, trace_flags);
4254 			else
4255 				print_func_help_header(iter->array_buffer, m,
4256 						       trace_flags);
4257 		}
4258 	}
4259 }
4260 
test_ftrace_alive(struct seq_file * m)4261 static void test_ftrace_alive(struct seq_file *m)
4262 {
4263 	if (!ftrace_is_dead())
4264 		return;
4265 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4266 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4267 }
4268 
4269 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4270 static void show_snapshot_main_help(struct seq_file *m)
4271 {
4272 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4273 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4274 		    "#                      Takes a snapshot of the main buffer.\n"
4275 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4276 		    "#                      (Doesn't have to be '2' works with any number that\n"
4277 		    "#                       is not a '0' or '1')\n");
4278 }
4279 
show_snapshot_percpu_help(struct seq_file * m)4280 static void show_snapshot_percpu_help(struct seq_file *m)
4281 {
4282 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4283 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4284 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4285 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4286 #else
4287 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4288 		    "#                     Must use main snapshot file to allocate.\n");
4289 #endif
4290 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4291 		    "#                      (Doesn't have to be '2' works with any number that\n"
4292 		    "#                       is not a '0' or '1')\n");
4293 }
4294 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4295 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4296 {
4297 	if (iter->tr->allocated_snapshot)
4298 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4299 	else
4300 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4301 
4302 	seq_puts(m, "# Snapshot commands:\n");
4303 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4304 		show_snapshot_main_help(m);
4305 	else
4306 		show_snapshot_percpu_help(m);
4307 }
4308 #else
4309 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4310 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4311 #endif
4312 
s_show(struct seq_file * m,void * v)4313 static int s_show(struct seq_file *m, void *v)
4314 {
4315 	struct trace_iterator *iter = v;
4316 	int ret;
4317 
4318 	if (iter->ent == NULL) {
4319 		if (iter->tr) {
4320 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4321 			seq_puts(m, "#\n");
4322 			test_ftrace_alive(m);
4323 		}
4324 		if (iter->snapshot && trace_empty(iter))
4325 			print_snapshot_help(m, iter);
4326 		else if (iter->trace && iter->trace->print_header)
4327 			iter->trace->print_header(m);
4328 		else
4329 			trace_default_header(m);
4330 
4331 	} else if (iter->leftover) {
4332 		/*
4333 		 * If we filled the seq_file buffer earlier, we
4334 		 * want to just show it now.
4335 		 */
4336 		ret = trace_print_seq(m, &iter->seq);
4337 
4338 		/* ret should this time be zero, but you never know */
4339 		iter->leftover = ret;
4340 
4341 	} else {
4342 		ret = print_trace_line(iter);
4343 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4344 			iter->seq.full = 0;
4345 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4346 		}
4347 		ret = trace_print_seq(m, &iter->seq);
4348 		/*
4349 		 * If we overflow the seq_file buffer, then it will
4350 		 * ask us for this data again at start up.
4351 		 * Use that instead.
4352 		 *  ret is 0 if seq_file write succeeded.
4353 		 *        -1 otherwise.
4354 		 */
4355 		iter->leftover = ret;
4356 	}
4357 
4358 	return 0;
4359 }
4360 
4361 /*
4362  * Should be used after trace_array_get(), trace_types_lock
4363  * ensures that i_cdev was already initialized.
4364  */
tracing_get_cpu(struct inode * inode)4365 static inline int tracing_get_cpu(struct inode *inode)
4366 {
4367 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4368 		return (long)inode->i_cdev - 1;
4369 	return RING_BUFFER_ALL_CPUS;
4370 }
4371 
4372 static const struct seq_operations tracer_seq_ops = {
4373 	.start		= s_start,
4374 	.next		= s_next,
4375 	.stop		= s_stop,
4376 	.show		= s_show,
4377 };
4378 
4379 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4380 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4381 {
4382 	struct trace_array *tr = inode->i_private;
4383 	struct trace_iterator *iter;
4384 	int cpu;
4385 
4386 	if (tracing_disabled)
4387 		return ERR_PTR(-ENODEV);
4388 
4389 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4390 	if (!iter)
4391 		return ERR_PTR(-ENOMEM);
4392 
4393 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4394 				    GFP_KERNEL);
4395 	if (!iter->buffer_iter)
4396 		goto release;
4397 
4398 	/*
4399 	 * trace_find_next_entry() may need to save off iter->ent.
4400 	 * It will place it into the iter->temp buffer. As most
4401 	 * events are less than 128, allocate a buffer of that size.
4402 	 * If one is greater, then trace_find_next_entry() will
4403 	 * allocate a new buffer to adjust for the bigger iter->ent.
4404 	 * It's not critical if it fails to get allocated here.
4405 	 */
4406 	iter->temp = kmalloc(128, GFP_KERNEL);
4407 	if (iter->temp)
4408 		iter->temp_size = 128;
4409 
4410 	/*
4411 	 * trace_event_printf() may need to modify given format
4412 	 * string to replace %p with %px so that it shows real address
4413 	 * instead of hash value. However, that is only for the event
4414 	 * tracing, other tracer may not need. Defer the allocation
4415 	 * until it is needed.
4416 	 */
4417 	iter->fmt = NULL;
4418 	iter->fmt_size = 0;
4419 
4420 	/*
4421 	 * We make a copy of the current tracer to avoid concurrent
4422 	 * changes on it while we are reading.
4423 	 */
4424 	mutex_lock(&trace_types_lock);
4425 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4426 	if (!iter->trace)
4427 		goto fail;
4428 
4429 	*iter->trace = *tr->current_trace;
4430 
4431 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4432 		goto fail;
4433 
4434 	iter->tr = tr;
4435 
4436 #ifdef CONFIG_TRACER_MAX_TRACE
4437 	/* Currently only the top directory has a snapshot */
4438 	if (tr->current_trace->print_max || snapshot)
4439 		iter->array_buffer = &tr->max_buffer;
4440 	else
4441 #endif
4442 		iter->array_buffer = &tr->array_buffer;
4443 	iter->snapshot = snapshot;
4444 	iter->pos = -1;
4445 	iter->cpu_file = tracing_get_cpu(inode);
4446 	mutex_init(&iter->mutex);
4447 
4448 	/* Notify the tracer early; before we stop tracing. */
4449 	if (iter->trace->open)
4450 		iter->trace->open(iter);
4451 
4452 	/* Annotate start of buffers if we had overruns */
4453 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4454 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4455 
4456 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4457 	if (trace_clocks[tr->clock_id].in_ns)
4458 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4459 
4460 	/*
4461 	 * If pause-on-trace is enabled, then stop the trace while
4462 	 * dumping, unless this is the "snapshot" file
4463 	 */
4464 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4465 		tracing_stop_tr(tr);
4466 
4467 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4468 		for_each_tracing_cpu(cpu) {
4469 			iter->buffer_iter[cpu] =
4470 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4471 							 cpu, GFP_KERNEL);
4472 		}
4473 		ring_buffer_read_prepare_sync();
4474 		for_each_tracing_cpu(cpu) {
4475 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4476 			tracing_iter_reset(iter, cpu);
4477 		}
4478 	} else {
4479 		cpu = iter->cpu_file;
4480 		iter->buffer_iter[cpu] =
4481 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4482 						 cpu, GFP_KERNEL);
4483 		ring_buffer_read_prepare_sync();
4484 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4485 		tracing_iter_reset(iter, cpu);
4486 	}
4487 
4488 	mutex_unlock(&trace_types_lock);
4489 
4490 	return iter;
4491 
4492  fail:
4493 	mutex_unlock(&trace_types_lock);
4494 	kfree(iter->trace);
4495 	kfree(iter->temp);
4496 	kfree(iter->buffer_iter);
4497 release:
4498 	seq_release_private(inode, file);
4499 	return ERR_PTR(-ENOMEM);
4500 }
4501 
tracing_open_generic(struct inode * inode,struct file * filp)4502 int tracing_open_generic(struct inode *inode, struct file *filp)
4503 {
4504 	int ret;
4505 
4506 	ret = tracing_check_open_get_tr(NULL);
4507 	if (ret)
4508 		return ret;
4509 
4510 	filp->private_data = inode->i_private;
4511 	return 0;
4512 }
4513 
tracing_is_disabled(void)4514 bool tracing_is_disabled(void)
4515 {
4516 	return (tracing_disabled) ? true: false;
4517 }
4518 
4519 /*
4520  * Open and update trace_array ref count.
4521  * Must have the current trace_array passed to it.
4522  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4523 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4524 {
4525 	struct trace_array *tr = inode->i_private;
4526 	int ret;
4527 
4528 	ret = tracing_check_open_get_tr(tr);
4529 	if (ret)
4530 		return ret;
4531 
4532 	filp->private_data = inode->i_private;
4533 
4534 	return 0;
4535 }
4536 
4537 /*
4538  * The private pointer of the inode is the trace_event_file.
4539  * Update the tr ref count associated to it.
4540  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4541 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4542 {
4543 	struct trace_event_file *file = inode->i_private;
4544 	int ret;
4545 
4546 	ret = tracing_check_open_get_tr(file->tr);
4547 	if (ret)
4548 		return ret;
4549 
4550 	mutex_lock(&event_mutex);
4551 
4552 	/* Fail if the file is marked for removal */
4553 	if (file->flags & EVENT_FILE_FL_FREED) {
4554 		trace_array_put(file->tr);
4555 		ret = -ENODEV;
4556 	} else {
4557 		event_file_get(file);
4558 	}
4559 
4560 	mutex_unlock(&event_mutex);
4561 	if (ret)
4562 		return ret;
4563 
4564 	filp->private_data = inode->i_private;
4565 
4566 	return 0;
4567 }
4568 
tracing_release_file_tr(struct inode * inode,struct file * filp)4569 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4570 {
4571 	struct trace_event_file *file = inode->i_private;
4572 
4573 	trace_array_put(file->tr);
4574 	event_file_put(file);
4575 
4576 	return 0;
4577 }
4578 
tracing_release(struct inode * inode,struct file * file)4579 static int tracing_release(struct inode *inode, struct file *file)
4580 {
4581 	struct trace_array *tr = inode->i_private;
4582 	struct seq_file *m = file->private_data;
4583 	struct trace_iterator *iter;
4584 	int cpu;
4585 
4586 	if (!(file->f_mode & FMODE_READ)) {
4587 		trace_array_put(tr);
4588 		return 0;
4589 	}
4590 
4591 	/* Writes do not use seq_file */
4592 	iter = m->private;
4593 	mutex_lock(&trace_types_lock);
4594 
4595 	for_each_tracing_cpu(cpu) {
4596 		if (iter->buffer_iter[cpu])
4597 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4598 	}
4599 
4600 	if (iter->trace && iter->trace->close)
4601 		iter->trace->close(iter);
4602 
4603 	if (!iter->snapshot && tr->stop_count)
4604 		/* reenable tracing if it was previously enabled */
4605 		tracing_start_tr(tr);
4606 
4607 	__trace_array_put(tr);
4608 
4609 	mutex_unlock(&trace_types_lock);
4610 
4611 	mutex_destroy(&iter->mutex);
4612 	free_cpumask_var(iter->started);
4613 	kfree(iter->fmt);
4614 	kfree(iter->temp);
4615 	kfree(iter->trace);
4616 	kfree(iter->buffer_iter);
4617 	seq_release_private(inode, file);
4618 
4619 	return 0;
4620 }
4621 
tracing_release_generic_tr(struct inode * inode,struct file * file)4622 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4623 {
4624 	struct trace_array *tr = inode->i_private;
4625 
4626 	trace_array_put(tr);
4627 	return 0;
4628 }
4629 
tracing_single_release_tr(struct inode * inode,struct file * file)4630 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4631 {
4632 	struct trace_array *tr = inode->i_private;
4633 
4634 	trace_array_put(tr);
4635 
4636 	return single_release(inode, file);
4637 }
4638 
tracing_open(struct inode * inode,struct file * file)4639 static int tracing_open(struct inode *inode, struct file *file)
4640 {
4641 	struct trace_array *tr = inode->i_private;
4642 	struct trace_iterator *iter;
4643 	int ret;
4644 
4645 	ret = tracing_check_open_get_tr(tr);
4646 	if (ret)
4647 		return ret;
4648 
4649 	/* If this file was open for write, then erase contents */
4650 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4651 		int cpu = tracing_get_cpu(inode);
4652 		struct array_buffer *trace_buf = &tr->array_buffer;
4653 
4654 #ifdef CONFIG_TRACER_MAX_TRACE
4655 		if (tr->current_trace->print_max)
4656 			trace_buf = &tr->max_buffer;
4657 #endif
4658 
4659 		if (cpu == RING_BUFFER_ALL_CPUS)
4660 			tracing_reset_online_cpus(trace_buf);
4661 		else
4662 			tracing_reset_cpu(trace_buf, cpu);
4663 	}
4664 
4665 	if (file->f_mode & FMODE_READ) {
4666 		iter = __tracing_open(inode, file, false);
4667 		if (IS_ERR(iter))
4668 			ret = PTR_ERR(iter);
4669 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4670 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4671 	}
4672 
4673 	if (ret < 0)
4674 		trace_array_put(tr);
4675 
4676 	return ret;
4677 }
4678 
4679 /*
4680  * Some tracers are not suitable for instance buffers.
4681  * A tracer is always available for the global array (toplevel)
4682  * or if it explicitly states that it is.
4683  */
4684 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4685 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4686 {
4687 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4688 }
4689 
4690 /* Find the next tracer that this trace array may use */
4691 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4692 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4693 {
4694 	while (t && !trace_ok_for_array(t, tr))
4695 		t = t->next;
4696 
4697 	return t;
4698 }
4699 
4700 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4701 t_next(struct seq_file *m, void *v, loff_t *pos)
4702 {
4703 	struct trace_array *tr = m->private;
4704 	struct tracer *t = v;
4705 
4706 	(*pos)++;
4707 
4708 	if (t)
4709 		t = get_tracer_for_array(tr, t->next);
4710 
4711 	return t;
4712 }
4713 
t_start(struct seq_file * m,loff_t * pos)4714 static void *t_start(struct seq_file *m, loff_t *pos)
4715 {
4716 	struct trace_array *tr = m->private;
4717 	struct tracer *t;
4718 	loff_t l = 0;
4719 
4720 	mutex_lock(&trace_types_lock);
4721 
4722 	t = get_tracer_for_array(tr, trace_types);
4723 	for (; t && l < *pos; t = t_next(m, t, &l))
4724 			;
4725 
4726 	return t;
4727 }
4728 
t_stop(struct seq_file * m,void * p)4729 static void t_stop(struct seq_file *m, void *p)
4730 {
4731 	mutex_unlock(&trace_types_lock);
4732 }
4733 
t_show(struct seq_file * m,void * v)4734 static int t_show(struct seq_file *m, void *v)
4735 {
4736 	struct tracer *t = v;
4737 
4738 	if (!t)
4739 		return 0;
4740 
4741 	seq_puts(m, t->name);
4742 	if (t->next)
4743 		seq_putc(m, ' ');
4744 	else
4745 		seq_putc(m, '\n');
4746 
4747 	return 0;
4748 }
4749 
4750 static const struct seq_operations show_traces_seq_ops = {
4751 	.start		= t_start,
4752 	.next		= t_next,
4753 	.stop		= t_stop,
4754 	.show		= t_show,
4755 };
4756 
show_traces_open(struct inode * inode,struct file * file)4757 static int show_traces_open(struct inode *inode, struct file *file)
4758 {
4759 	struct trace_array *tr = inode->i_private;
4760 	struct seq_file *m;
4761 	int ret;
4762 
4763 	ret = tracing_check_open_get_tr(tr);
4764 	if (ret)
4765 		return ret;
4766 
4767 	ret = seq_open(file, &show_traces_seq_ops);
4768 	if (ret) {
4769 		trace_array_put(tr);
4770 		return ret;
4771 	}
4772 
4773 	m = file->private_data;
4774 	m->private = tr;
4775 
4776 	return 0;
4777 }
4778 
show_traces_release(struct inode * inode,struct file * file)4779 static int show_traces_release(struct inode *inode, struct file *file)
4780 {
4781 	struct trace_array *tr = inode->i_private;
4782 
4783 	trace_array_put(tr);
4784 	return seq_release(inode, file);
4785 }
4786 
4787 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4788 tracing_write_stub(struct file *filp, const char __user *ubuf,
4789 		   size_t count, loff_t *ppos)
4790 {
4791 	return count;
4792 }
4793 
tracing_lseek(struct file * file,loff_t offset,int whence)4794 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4795 {
4796 	int ret;
4797 
4798 	if (file->f_mode & FMODE_READ)
4799 		ret = seq_lseek(file, offset, whence);
4800 	else
4801 		file->f_pos = ret = 0;
4802 
4803 	return ret;
4804 }
4805 
4806 static const struct file_operations tracing_fops = {
4807 	.open		= tracing_open,
4808 	.read		= seq_read,
4809 	.read_iter	= seq_read_iter,
4810 	.splice_read	= generic_file_splice_read,
4811 	.write		= tracing_write_stub,
4812 	.llseek		= tracing_lseek,
4813 	.release	= tracing_release,
4814 };
4815 
4816 static const struct file_operations show_traces_fops = {
4817 	.open		= show_traces_open,
4818 	.read		= seq_read,
4819 	.llseek		= seq_lseek,
4820 	.release	= show_traces_release,
4821 };
4822 
4823 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4824 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4825 		     size_t count, loff_t *ppos)
4826 {
4827 	struct trace_array *tr = file_inode(filp)->i_private;
4828 	char *mask_str;
4829 	int len;
4830 
4831 	len = snprintf(NULL, 0, "%*pb\n",
4832 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4833 	mask_str = kmalloc(len, GFP_KERNEL);
4834 	if (!mask_str)
4835 		return -ENOMEM;
4836 
4837 	len = snprintf(mask_str, len, "%*pb\n",
4838 		       cpumask_pr_args(tr->tracing_cpumask));
4839 	if (len >= count) {
4840 		count = -EINVAL;
4841 		goto out_err;
4842 	}
4843 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4844 
4845 out_err:
4846 	kfree(mask_str);
4847 
4848 	return count;
4849 }
4850 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4851 int tracing_set_cpumask(struct trace_array *tr,
4852 			cpumask_var_t tracing_cpumask_new)
4853 {
4854 	int cpu;
4855 
4856 	if (!tr)
4857 		return -EINVAL;
4858 
4859 	local_irq_disable();
4860 	arch_spin_lock(&tr->max_lock);
4861 	for_each_tracing_cpu(cpu) {
4862 		/*
4863 		 * Increase/decrease the disabled counter if we are
4864 		 * about to flip a bit in the cpumask:
4865 		 */
4866 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4867 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4868 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4869 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4870 #ifdef CONFIG_TRACER_MAX_TRACE
4871 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
4872 #endif
4873 		}
4874 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4875 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4876 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4877 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4878 #ifdef CONFIG_TRACER_MAX_TRACE
4879 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
4880 #endif
4881 		}
4882 	}
4883 	arch_spin_unlock(&tr->max_lock);
4884 	local_irq_enable();
4885 
4886 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4887 
4888 	return 0;
4889 }
4890 
4891 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4892 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4893 		      size_t count, loff_t *ppos)
4894 {
4895 	struct trace_array *tr = file_inode(filp)->i_private;
4896 	cpumask_var_t tracing_cpumask_new;
4897 	int err;
4898 
4899 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4900 		return -ENOMEM;
4901 
4902 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4903 	if (err)
4904 		goto err_free;
4905 
4906 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4907 	if (err)
4908 		goto err_free;
4909 
4910 	free_cpumask_var(tracing_cpumask_new);
4911 
4912 	return count;
4913 
4914 err_free:
4915 	free_cpumask_var(tracing_cpumask_new);
4916 
4917 	return err;
4918 }
4919 
4920 static const struct file_operations tracing_cpumask_fops = {
4921 	.open		= tracing_open_generic_tr,
4922 	.read		= tracing_cpumask_read,
4923 	.write		= tracing_cpumask_write,
4924 	.release	= tracing_release_generic_tr,
4925 	.llseek		= generic_file_llseek,
4926 };
4927 
tracing_trace_options_show(struct seq_file * m,void * v)4928 static int tracing_trace_options_show(struct seq_file *m, void *v)
4929 {
4930 	struct tracer_opt *trace_opts;
4931 	struct trace_array *tr = m->private;
4932 	u32 tracer_flags;
4933 	int i;
4934 
4935 	mutex_lock(&trace_types_lock);
4936 	tracer_flags = tr->current_trace->flags->val;
4937 	trace_opts = tr->current_trace->flags->opts;
4938 
4939 	for (i = 0; trace_options[i]; i++) {
4940 		if (tr->trace_flags & (1 << i))
4941 			seq_printf(m, "%s\n", trace_options[i]);
4942 		else
4943 			seq_printf(m, "no%s\n", trace_options[i]);
4944 	}
4945 
4946 	for (i = 0; trace_opts[i].name; i++) {
4947 		if (tracer_flags & trace_opts[i].bit)
4948 			seq_printf(m, "%s\n", trace_opts[i].name);
4949 		else
4950 			seq_printf(m, "no%s\n", trace_opts[i].name);
4951 	}
4952 	mutex_unlock(&trace_types_lock);
4953 
4954 	return 0;
4955 }
4956 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4957 static int __set_tracer_option(struct trace_array *tr,
4958 			       struct tracer_flags *tracer_flags,
4959 			       struct tracer_opt *opts, int neg)
4960 {
4961 	struct tracer *trace = tracer_flags->trace;
4962 	int ret;
4963 
4964 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4965 	if (ret)
4966 		return ret;
4967 
4968 	if (neg)
4969 		tracer_flags->val &= ~opts->bit;
4970 	else
4971 		tracer_flags->val |= opts->bit;
4972 	return 0;
4973 }
4974 
4975 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4976 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4977 {
4978 	struct tracer *trace = tr->current_trace;
4979 	struct tracer_flags *tracer_flags = trace->flags;
4980 	struct tracer_opt *opts = NULL;
4981 	int i;
4982 
4983 	for (i = 0; tracer_flags->opts[i].name; i++) {
4984 		opts = &tracer_flags->opts[i];
4985 
4986 		if (strcmp(cmp, opts->name) == 0)
4987 			return __set_tracer_option(tr, trace->flags, opts, neg);
4988 	}
4989 
4990 	return -EINVAL;
4991 }
4992 
4993 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4994 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4995 {
4996 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4997 		return -1;
4998 
4999 	return 0;
5000 }
5001 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5002 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5003 {
5004 	int *map;
5005 
5006 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5007 	    (mask == TRACE_ITER_RECORD_CMD))
5008 		lockdep_assert_held(&event_mutex);
5009 
5010 	/* do nothing if flag is already set */
5011 	if (!!(tr->trace_flags & mask) == !!enabled)
5012 		return 0;
5013 
5014 	/* Give the tracer a chance to approve the change */
5015 	if (tr->current_trace->flag_changed)
5016 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5017 			return -EINVAL;
5018 
5019 	if (enabled)
5020 		tr->trace_flags |= mask;
5021 	else
5022 		tr->trace_flags &= ~mask;
5023 
5024 	if (mask == TRACE_ITER_RECORD_CMD)
5025 		trace_event_enable_cmd_record(enabled);
5026 
5027 	if (mask == TRACE_ITER_RECORD_TGID) {
5028 		if (!tgid_map) {
5029 			tgid_map_max = pid_max;
5030 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5031 				       GFP_KERNEL);
5032 
5033 			/*
5034 			 * Pairs with smp_load_acquire() in
5035 			 * trace_find_tgid_ptr() to ensure that if it observes
5036 			 * the tgid_map we just allocated then it also observes
5037 			 * the corresponding tgid_map_max value.
5038 			 */
5039 			smp_store_release(&tgid_map, map);
5040 		}
5041 		if (!tgid_map) {
5042 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5043 			return -ENOMEM;
5044 		}
5045 
5046 		trace_event_enable_tgid_record(enabled);
5047 	}
5048 
5049 	if (mask == TRACE_ITER_EVENT_FORK)
5050 		trace_event_follow_fork(tr, enabled);
5051 
5052 	if (mask == TRACE_ITER_FUNC_FORK)
5053 		ftrace_pid_follow_fork(tr, enabled);
5054 
5055 	if (mask == TRACE_ITER_OVERWRITE) {
5056 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5057 #ifdef CONFIG_TRACER_MAX_TRACE
5058 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5059 #endif
5060 	}
5061 
5062 	if (mask == TRACE_ITER_PRINTK) {
5063 		trace_printk_start_stop_comm(enabled);
5064 		trace_printk_control(enabled);
5065 	}
5066 
5067 	return 0;
5068 }
5069 
trace_set_options(struct trace_array * tr,char * option)5070 int trace_set_options(struct trace_array *tr, char *option)
5071 {
5072 	char *cmp;
5073 	int neg = 0;
5074 	int ret;
5075 	size_t orig_len = strlen(option);
5076 	int len;
5077 
5078 	cmp = strstrip(option);
5079 
5080 	len = str_has_prefix(cmp, "no");
5081 	if (len)
5082 		neg = 1;
5083 
5084 	cmp += len;
5085 
5086 	mutex_lock(&event_mutex);
5087 	mutex_lock(&trace_types_lock);
5088 
5089 	ret = match_string(trace_options, -1, cmp);
5090 	/* If no option could be set, test the specific tracer options */
5091 	if (ret < 0)
5092 		ret = set_tracer_option(tr, cmp, neg);
5093 	else
5094 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5095 
5096 	mutex_unlock(&trace_types_lock);
5097 	mutex_unlock(&event_mutex);
5098 
5099 	/*
5100 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5101 	 * turn it back into a space.
5102 	 */
5103 	if (orig_len > strlen(option))
5104 		option[strlen(option)] = ' ';
5105 
5106 	return ret;
5107 }
5108 
apply_trace_boot_options(void)5109 static void __init apply_trace_boot_options(void)
5110 {
5111 	char *buf = trace_boot_options_buf;
5112 	char *option;
5113 
5114 	while (true) {
5115 		option = strsep(&buf, ",");
5116 
5117 		if (!option)
5118 			break;
5119 
5120 		if (*option)
5121 			trace_set_options(&global_trace, option);
5122 
5123 		/* Put back the comma to allow this to be called again */
5124 		if (buf)
5125 			*(buf - 1) = ',';
5126 	}
5127 }
5128 
5129 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5130 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5131 			size_t cnt, loff_t *ppos)
5132 {
5133 	struct seq_file *m = filp->private_data;
5134 	struct trace_array *tr = m->private;
5135 	char buf[64];
5136 	int ret;
5137 
5138 	if (cnt >= sizeof(buf))
5139 		return -EINVAL;
5140 
5141 	if (copy_from_user(buf, ubuf, cnt))
5142 		return -EFAULT;
5143 
5144 	buf[cnt] = 0;
5145 
5146 	ret = trace_set_options(tr, buf);
5147 	if (ret < 0)
5148 		return ret;
5149 
5150 	*ppos += cnt;
5151 
5152 	return cnt;
5153 }
5154 
tracing_trace_options_open(struct inode * inode,struct file * file)5155 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5156 {
5157 	struct trace_array *tr = inode->i_private;
5158 	int ret;
5159 
5160 	ret = tracing_check_open_get_tr(tr);
5161 	if (ret)
5162 		return ret;
5163 
5164 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5165 	if (ret < 0)
5166 		trace_array_put(tr);
5167 
5168 	return ret;
5169 }
5170 
5171 static const struct file_operations tracing_iter_fops = {
5172 	.open		= tracing_trace_options_open,
5173 	.read		= seq_read,
5174 	.llseek		= seq_lseek,
5175 	.release	= tracing_single_release_tr,
5176 	.write		= tracing_trace_options_write,
5177 };
5178 
5179 static const char readme_msg[] =
5180 	"tracing mini-HOWTO:\n\n"
5181 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5182 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5183 	" Important files:\n"
5184 	"  trace\t\t\t- The static contents of the buffer\n"
5185 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5186 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5187 	"  current_tracer\t- function and latency tracers\n"
5188 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5189 	"  error_log\t- error log for failed commands (that support it)\n"
5190 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5191 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5192 	"  trace_clock\t\t-change the clock used to order events\n"
5193 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5194 	"      global:   Synced across CPUs but slows tracing down.\n"
5195 	"     counter:   Not a clock, but just an increment\n"
5196 	"      uptime:   Jiffy counter from time of boot\n"
5197 	"        perf:   Same clock that perf events use\n"
5198 #ifdef CONFIG_X86_64
5199 	"     x86-tsc:   TSC cycle counter\n"
5200 #endif
5201 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5202 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5203 	"    absolute:   Absolute (standalone) timestamp\n"
5204 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5205 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5206 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5207 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5208 	"\t\t\t  Remove sub-buffer with rmdir\n"
5209 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5210 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5211 	"\t\t\t  option name\n"
5212 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5213 #ifdef CONFIG_DYNAMIC_FTRACE
5214 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5215 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5216 	"\t\t\t  functions\n"
5217 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5218 	"\t     modules: Can select a group via module\n"
5219 	"\t      Format: :mod:<module-name>\n"
5220 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5221 	"\t    triggers: a command to perform when function is hit\n"
5222 	"\t      Format: <function>:<trigger>[:count]\n"
5223 	"\t     trigger: traceon, traceoff\n"
5224 	"\t\t      enable_event:<system>:<event>\n"
5225 	"\t\t      disable_event:<system>:<event>\n"
5226 #ifdef CONFIG_STACKTRACE
5227 	"\t\t      stacktrace\n"
5228 #endif
5229 #ifdef CONFIG_TRACER_SNAPSHOT
5230 	"\t\t      snapshot\n"
5231 #endif
5232 	"\t\t      dump\n"
5233 	"\t\t      cpudump\n"
5234 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5235 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5236 	"\t     The first one will disable tracing every time do_fault is hit\n"
5237 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5238 	"\t       The first time do trap is hit and it disables tracing, the\n"
5239 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5240 	"\t       the counter will not decrement. It only decrements when the\n"
5241 	"\t       trigger did work\n"
5242 	"\t     To remove trigger without count:\n"
5243 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5244 	"\t     To remove trigger with a count:\n"
5245 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5246 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5247 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5248 	"\t    modules: Can select a group via module command :mod:\n"
5249 	"\t    Does not accept triggers\n"
5250 #endif /* CONFIG_DYNAMIC_FTRACE */
5251 #ifdef CONFIG_FUNCTION_TRACER
5252 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5253 	"\t\t    (function)\n"
5254 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5255 	"\t\t    (function)\n"
5256 #endif
5257 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5258 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5259 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5260 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5261 #endif
5262 #ifdef CONFIG_TRACER_SNAPSHOT
5263 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5264 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5265 	"\t\t\t  information\n"
5266 #endif
5267 #ifdef CONFIG_STACK_TRACER
5268 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5269 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5270 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5271 	"\t\t\t  new trace)\n"
5272 #ifdef CONFIG_DYNAMIC_FTRACE
5273 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5274 	"\t\t\t  traces\n"
5275 #endif
5276 #endif /* CONFIG_STACK_TRACER */
5277 #ifdef CONFIG_DYNAMIC_EVENTS
5278 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5279 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5280 #endif
5281 #ifdef CONFIG_KPROBE_EVENTS
5282 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5283 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5284 #endif
5285 #ifdef CONFIG_UPROBE_EVENTS
5286 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5287 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5288 #endif
5289 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5290 	"\t  accepts: event-definitions (one definition per line)\n"
5291 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5292 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5293 #ifdef CONFIG_HIST_TRIGGERS
5294 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5295 #endif
5296 	"\t           -:[<group>/]<event>\n"
5297 #ifdef CONFIG_KPROBE_EVENTS
5298 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5299   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5300 #endif
5301 #ifdef CONFIG_UPROBE_EVENTS
5302   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5303 #endif
5304 	"\t     args: <name>=fetcharg[:type]\n"
5305 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5306 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5307 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5308 #else
5309 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5310 #endif
5311 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5312 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5313 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5314 	"\t           <type>\\[<array-size>\\]\n"
5315 #ifdef CONFIG_HIST_TRIGGERS
5316 	"\t    field: <stype> <name>;\n"
5317 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5318 	"\t           [unsigned] char/int/long\n"
5319 #endif
5320 #endif
5321 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5322 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5323 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5324 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5325 	"\t\t\t  events\n"
5326 	"      filter\t\t- If set, only events passing filter are traced\n"
5327 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5328 	"\t\t\t  <event>:\n"
5329 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5330 	"      filter\t\t- If set, only events passing filter are traced\n"
5331 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5332 	"\t    Format: <trigger>[:count][if <filter>]\n"
5333 	"\t   trigger: traceon, traceoff\n"
5334 	"\t            enable_event:<system>:<event>\n"
5335 	"\t            disable_event:<system>:<event>\n"
5336 #ifdef CONFIG_HIST_TRIGGERS
5337 	"\t            enable_hist:<system>:<event>\n"
5338 	"\t            disable_hist:<system>:<event>\n"
5339 #endif
5340 #ifdef CONFIG_STACKTRACE
5341 	"\t\t    stacktrace\n"
5342 #endif
5343 #ifdef CONFIG_TRACER_SNAPSHOT
5344 	"\t\t    snapshot\n"
5345 #endif
5346 #ifdef CONFIG_HIST_TRIGGERS
5347 	"\t\t    hist (see below)\n"
5348 #endif
5349 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5350 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5351 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5352 	"\t                  events/block/block_unplug/trigger\n"
5353 	"\t   The first disables tracing every time block_unplug is hit.\n"
5354 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5355 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5356 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5357 	"\t   Like function triggers, the counter is only decremented if it\n"
5358 	"\t    enabled or disabled tracing.\n"
5359 	"\t   To remove a trigger without a count:\n"
5360 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5361 	"\t   To remove a trigger with a count:\n"
5362 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5363 	"\t   Filters can be ignored when removing a trigger.\n"
5364 #ifdef CONFIG_HIST_TRIGGERS
5365 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5366 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5367 	"\t            [:values=<field1[,field2,...]>]\n"
5368 	"\t            [:sort=<field1[,field2,...]>]\n"
5369 	"\t            [:size=#entries]\n"
5370 	"\t            [:pause][:continue][:clear]\n"
5371 	"\t            [:name=histname1]\n"
5372 	"\t            [:<handler>.<action>]\n"
5373 	"\t            [if <filter>]\n\n"
5374 	"\t    Note, special fields can be used as well:\n"
5375 	"\t            common_timestamp - to record current timestamp\n"
5376 	"\t            common_cpu - to record the CPU the event happened on\n"
5377 	"\n"
5378 	"\t    When a matching event is hit, an entry is added to a hash\n"
5379 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5380 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5381 	"\t    correspond to fields in the event's format description.  Keys\n"
5382 	"\t    can be any field, or the special string 'stacktrace'.\n"
5383 	"\t    Compound keys consisting of up to two fields can be specified\n"
5384 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5385 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5386 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5387 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5388 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5389 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5390 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5391 	"\t    its histogram data will be shared with other triggers of the\n"
5392 	"\t    same name, and trigger hits will update this common data.\n\n"
5393 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5394 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5395 	"\t    triggers attached to an event, there will be a table for each\n"
5396 	"\t    trigger in the output.  The table displayed for a named\n"
5397 	"\t    trigger will be the same as any other instance having the\n"
5398 	"\t    same name.  The default format used to display a given field\n"
5399 	"\t    can be modified by appending any of the following modifiers\n"
5400 	"\t    to the field name, as applicable:\n\n"
5401 	"\t            .hex        display a number as a hex value\n"
5402 	"\t            .sym        display an address as a symbol\n"
5403 	"\t            .sym-offset display an address as a symbol and offset\n"
5404 	"\t            .execname   display a common_pid as a program name\n"
5405 	"\t            .syscall    display a syscall id as a syscall name\n"
5406 	"\t            .log2       display log2 value rather than raw number\n"
5407 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5408 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5409 	"\t    trigger or to start a hist trigger but not log any events\n"
5410 	"\t    until told to do so.  'continue' can be used to start or\n"
5411 	"\t    restart a paused hist trigger.\n\n"
5412 	"\t    The 'clear' parameter will clear the contents of a running\n"
5413 	"\t    hist trigger and leave its current paused/active state\n"
5414 	"\t    unchanged.\n\n"
5415 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5416 	"\t    have one event conditionally start and stop another event's\n"
5417 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5418 	"\t    the enable_event and disable_event triggers.\n\n"
5419 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5420 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5421 	"\t        <handler>.<action>\n\n"
5422 	"\t    The available handlers are:\n\n"
5423 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5424 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5425 	"\t        onchange(var)            - invoke action if var changes\n\n"
5426 	"\t    The available actions are:\n\n"
5427 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5428 	"\t        save(field,...)                      - save current event fields\n"
5429 #ifdef CONFIG_TRACER_SNAPSHOT
5430 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5431 #endif
5432 #ifdef CONFIG_SYNTH_EVENTS
5433 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5434 	"\t  Write into this file to define/undefine new synthetic events.\n"
5435 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5436 #endif
5437 #endif
5438 ;
5439 
5440 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5441 tracing_readme_read(struct file *filp, char __user *ubuf,
5442 		       size_t cnt, loff_t *ppos)
5443 {
5444 	return simple_read_from_buffer(ubuf, cnt, ppos,
5445 					readme_msg, strlen(readme_msg));
5446 }
5447 
5448 static const struct file_operations tracing_readme_fops = {
5449 	.open		= tracing_open_generic,
5450 	.read		= tracing_readme_read,
5451 	.llseek		= generic_file_llseek,
5452 };
5453 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5454 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5455 {
5456 	int pid = ++(*pos);
5457 
5458 	return trace_find_tgid_ptr(pid);
5459 }
5460 
saved_tgids_start(struct seq_file * m,loff_t * pos)5461 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5462 {
5463 	int pid = *pos;
5464 
5465 	return trace_find_tgid_ptr(pid);
5466 }
5467 
saved_tgids_stop(struct seq_file * m,void * v)5468 static void saved_tgids_stop(struct seq_file *m, void *v)
5469 {
5470 }
5471 
saved_tgids_show(struct seq_file * m,void * v)5472 static int saved_tgids_show(struct seq_file *m, void *v)
5473 {
5474 	int *entry = (int *)v;
5475 	int pid = entry - tgid_map;
5476 	int tgid = *entry;
5477 
5478 	if (tgid == 0)
5479 		return SEQ_SKIP;
5480 
5481 	seq_printf(m, "%d %d\n", pid, tgid);
5482 	return 0;
5483 }
5484 
5485 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5486 	.start		= saved_tgids_start,
5487 	.stop		= saved_tgids_stop,
5488 	.next		= saved_tgids_next,
5489 	.show		= saved_tgids_show,
5490 };
5491 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5492 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5493 {
5494 	int ret;
5495 
5496 	ret = tracing_check_open_get_tr(NULL);
5497 	if (ret)
5498 		return ret;
5499 
5500 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5501 }
5502 
5503 
5504 static const struct file_operations tracing_saved_tgids_fops = {
5505 	.open		= tracing_saved_tgids_open,
5506 	.read		= seq_read,
5507 	.llseek		= seq_lseek,
5508 	.release	= seq_release,
5509 };
5510 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5511 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5512 {
5513 	unsigned int *ptr = v;
5514 
5515 	if (*pos || m->count)
5516 		ptr++;
5517 
5518 	(*pos)++;
5519 
5520 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5521 	     ptr++) {
5522 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5523 			continue;
5524 
5525 		return ptr;
5526 	}
5527 
5528 	return NULL;
5529 }
5530 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5531 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5532 {
5533 	void *v;
5534 	loff_t l = 0;
5535 
5536 	preempt_disable();
5537 	arch_spin_lock(&trace_cmdline_lock);
5538 
5539 	v = &savedcmd->map_cmdline_to_pid[0];
5540 	while (l <= *pos) {
5541 		v = saved_cmdlines_next(m, v, &l);
5542 		if (!v)
5543 			return NULL;
5544 	}
5545 
5546 	return v;
5547 }
5548 
saved_cmdlines_stop(struct seq_file * m,void * v)5549 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5550 {
5551 	arch_spin_unlock(&trace_cmdline_lock);
5552 	preempt_enable();
5553 }
5554 
saved_cmdlines_show(struct seq_file * m,void * v)5555 static int saved_cmdlines_show(struct seq_file *m, void *v)
5556 {
5557 	char buf[TASK_COMM_LEN];
5558 	unsigned int *pid = v;
5559 
5560 	__trace_find_cmdline(*pid, buf);
5561 	seq_printf(m, "%d %s\n", *pid, buf);
5562 	return 0;
5563 }
5564 
5565 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5566 	.start		= saved_cmdlines_start,
5567 	.next		= saved_cmdlines_next,
5568 	.stop		= saved_cmdlines_stop,
5569 	.show		= saved_cmdlines_show,
5570 };
5571 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5572 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5573 {
5574 	int ret;
5575 
5576 	ret = tracing_check_open_get_tr(NULL);
5577 	if (ret)
5578 		return ret;
5579 
5580 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5581 }
5582 
5583 static const struct file_operations tracing_saved_cmdlines_fops = {
5584 	.open		= tracing_saved_cmdlines_open,
5585 	.read		= seq_read,
5586 	.llseek		= seq_lseek,
5587 	.release	= seq_release,
5588 };
5589 
5590 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5591 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5592 				 size_t cnt, loff_t *ppos)
5593 {
5594 	char buf[64];
5595 	int r;
5596 
5597 	preempt_disable();
5598 	arch_spin_lock(&trace_cmdline_lock);
5599 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5600 	arch_spin_unlock(&trace_cmdline_lock);
5601 	preempt_enable();
5602 
5603 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5604 }
5605 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5606 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5607 {
5608 	kfree(s->saved_cmdlines);
5609 	kfree(s->map_cmdline_to_pid);
5610 	kfree(s);
5611 }
5612 
tracing_resize_saved_cmdlines(unsigned int val)5613 static int tracing_resize_saved_cmdlines(unsigned int val)
5614 {
5615 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5616 
5617 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5618 	if (!s)
5619 		return -ENOMEM;
5620 
5621 	if (allocate_cmdlines_buffer(val, s) < 0) {
5622 		kfree(s);
5623 		return -ENOMEM;
5624 	}
5625 
5626 	preempt_disable();
5627 	arch_spin_lock(&trace_cmdline_lock);
5628 	savedcmd_temp = savedcmd;
5629 	savedcmd = s;
5630 	arch_spin_unlock(&trace_cmdline_lock);
5631 	preempt_enable();
5632 	free_saved_cmdlines_buffer(savedcmd_temp);
5633 
5634 	return 0;
5635 }
5636 
5637 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5638 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5639 				  size_t cnt, loff_t *ppos)
5640 {
5641 	unsigned long val;
5642 	int ret;
5643 
5644 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5645 	if (ret)
5646 		return ret;
5647 
5648 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5649 	if (!val || val > PID_MAX_DEFAULT)
5650 		return -EINVAL;
5651 
5652 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5653 	if (ret < 0)
5654 		return ret;
5655 
5656 	*ppos += cnt;
5657 
5658 	return cnt;
5659 }
5660 
5661 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5662 	.open		= tracing_open_generic,
5663 	.read		= tracing_saved_cmdlines_size_read,
5664 	.write		= tracing_saved_cmdlines_size_write,
5665 };
5666 
5667 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5668 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5669 update_eval_map(union trace_eval_map_item *ptr)
5670 {
5671 	if (!ptr->map.eval_string) {
5672 		if (ptr->tail.next) {
5673 			ptr = ptr->tail.next;
5674 			/* Set ptr to the next real item (skip head) */
5675 			ptr++;
5676 		} else
5677 			return NULL;
5678 	}
5679 	return ptr;
5680 }
5681 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5682 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5683 {
5684 	union trace_eval_map_item *ptr = v;
5685 
5686 	/*
5687 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5688 	 * This really should never happen.
5689 	 */
5690 	(*pos)++;
5691 	ptr = update_eval_map(ptr);
5692 	if (WARN_ON_ONCE(!ptr))
5693 		return NULL;
5694 
5695 	ptr++;
5696 	ptr = update_eval_map(ptr);
5697 
5698 	return ptr;
5699 }
5700 
eval_map_start(struct seq_file * m,loff_t * pos)5701 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5702 {
5703 	union trace_eval_map_item *v;
5704 	loff_t l = 0;
5705 
5706 	mutex_lock(&trace_eval_mutex);
5707 
5708 	v = trace_eval_maps;
5709 	if (v)
5710 		v++;
5711 
5712 	while (v && l < *pos) {
5713 		v = eval_map_next(m, v, &l);
5714 	}
5715 
5716 	return v;
5717 }
5718 
eval_map_stop(struct seq_file * m,void * v)5719 static void eval_map_stop(struct seq_file *m, void *v)
5720 {
5721 	mutex_unlock(&trace_eval_mutex);
5722 }
5723 
eval_map_show(struct seq_file * m,void * v)5724 static int eval_map_show(struct seq_file *m, void *v)
5725 {
5726 	union trace_eval_map_item *ptr = v;
5727 
5728 	seq_printf(m, "%s %ld (%s)\n",
5729 		   ptr->map.eval_string, ptr->map.eval_value,
5730 		   ptr->map.system);
5731 
5732 	return 0;
5733 }
5734 
5735 static const struct seq_operations tracing_eval_map_seq_ops = {
5736 	.start		= eval_map_start,
5737 	.next		= eval_map_next,
5738 	.stop		= eval_map_stop,
5739 	.show		= eval_map_show,
5740 };
5741 
tracing_eval_map_open(struct inode * inode,struct file * filp)5742 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5743 {
5744 	int ret;
5745 
5746 	ret = tracing_check_open_get_tr(NULL);
5747 	if (ret)
5748 		return ret;
5749 
5750 	return seq_open(filp, &tracing_eval_map_seq_ops);
5751 }
5752 
5753 static const struct file_operations tracing_eval_map_fops = {
5754 	.open		= tracing_eval_map_open,
5755 	.read		= seq_read,
5756 	.llseek		= seq_lseek,
5757 	.release	= seq_release,
5758 };
5759 
5760 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5761 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5762 {
5763 	/* Return tail of array given the head */
5764 	return ptr + ptr->head.length + 1;
5765 }
5766 
5767 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5768 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5769 			   int len)
5770 {
5771 	struct trace_eval_map **stop;
5772 	struct trace_eval_map **map;
5773 	union trace_eval_map_item *map_array;
5774 	union trace_eval_map_item *ptr;
5775 
5776 	stop = start + len;
5777 
5778 	/*
5779 	 * The trace_eval_maps contains the map plus a head and tail item,
5780 	 * where the head holds the module and length of array, and the
5781 	 * tail holds a pointer to the next list.
5782 	 */
5783 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5784 	if (!map_array) {
5785 		pr_warn("Unable to allocate trace eval mapping\n");
5786 		return;
5787 	}
5788 
5789 	mutex_lock(&trace_eval_mutex);
5790 
5791 	if (!trace_eval_maps)
5792 		trace_eval_maps = map_array;
5793 	else {
5794 		ptr = trace_eval_maps;
5795 		for (;;) {
5796 			ptr = trace_eval_jmp_to_tail(ptr);
5797 			if (!ptr->tail.next)
5798 				break;
5799 			ptr = ptr->tail.next;
5800 
5801 		}
5802 		ptr->tail.next = map_array;
5803 	}
5804 	map_array->head.mod = mod;
5805 	map_array->head.length = len;
5806 	map_array++;
5807 
5808 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5809 		map_array->map = **map;
5810 		map_array++;
5811 	}
5812 	memset(map_array, 0, sizeof(*map_array));
5813 
5814 	mutex_unlock(&trace_eval_mutex);
5815 }
5816 
trace_create_eval_file(struct dentry * d_tracer)5817 static void trace_create_eval_file(struct dentry *d_tracer)
5818 {
5819 	trace_create_file("eval_map", 0444, d_tracer,
5820 			  NULL, &tracing_eval_map_fops);
5821 }
5822 
5823 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5824 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5825 static inline void trace_insert_eval_map_file(struct module *mod,
5826 			      struct trace_eval_map **start, int len) { }
5827 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5828 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5829 static void trace_insert_eval_map(struct module *mod,
5830 				  struct trace_eval_map **start, int len)
5831 {
5832 	struct trace_eval_map **map;
5833 
5834 	if (len <= 0)
5835 		return;
5836 
5837 	map = start;
5838 
5839 	trace_event_eval_update(map, len);
5840 
5841 	trace_insert_eval_map_file(mod, start, len);
5842 }
5843 
5844 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5845 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5846 		       size_t cnt, loff_t *ppos)
5847 {
5848 	struct trace_array *tr = filp->private_data;
5849 	char buf[MAX_TRACER_SIZE+2];
5850 	int r;
5851 
5852 	mutex_lock(&trace_types_lock);
5853 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5854 	mutex_unlock(&trace_types_lock);
5855 
5856 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5857 }
5858 
tracer_init(struct tracer * t,struct trace_array * tr)5859 int tracer_init(struct tracer *t, struct trace_array *tr)
5860 {
5861 	tracing_reset_online_cpus(&tr->array_buffer);
5862 	return t->init(tr);
5863 }
5864 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5865 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5866 {
5867 	int cpu;
5868 
5869 	for_each_tracing_cpu(cpu)
5870 		per_cpu_ptr(buf->data, cpu)->entries = val;
5871 }
5872 
update_buffer_entries(struct array_buffer * buf,int cpu)5873 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5874 {
5875 	if (cpu == RING_BUFFER_ALL_CPUS) {
5876 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5877 	} else {
5878 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5879 	}
5880 }
5881 
5882 #ifdef CONFIG_TRACER_MAX_TRACE
5883 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5884 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5885 					struct array_buffer *size_buf, int cpu_id)
5886 {
5887 	int cpu, ret = 0;
5888 
5889 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5890 		for_each_tracing_cpu(cpu) {
5891 			ret = ring_buffer_resize(trace_buf->buffer,
5892 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5893 			if (ret < 0)
5894 				break;
5895 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5896 				per_cpu_ptr(size_buf->data, cpu)->entries;
5897 		}
5898 	} else {
5899 		ret = ring_buffer_resize(trace_buf->buffer,
5900 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5901 		if (ret == 0)
5902 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5903 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5904 	}
5905 
5906 	return ret;
5907 }
5908 #endif /* CONFIG_TRACER_MAX_TRACE */
5909 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5910 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5911 					unsigned long size, int cpu)
5912 {
5913 	int ret;
5914 
5915 	/*
5916 	 * If kernel or user changes the size of the ring buffer
5917 	 * we use the size that was given, and we can forget about
5918 	 * expanding it later.
5919 	 */
5920 	ring_buffer_expanded = true;
5921 
5922 	/* May be called before buffers are initialized */
5923 	if (!tr->array_buffer.buffer)
5924 		return 0;
5925 
5926 	/* Do not allow tracing while resizing ring buffer */
5927 	tracing_stop_tr(tr);
5928 
5929 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5930 	if (ret < 0)
5931 		goto out_start;
5932 
5933 #ifdef CONFIG_TRACER_MAX_TRACE
5934 	if (!tr->allocated_snapshot)
5935 		goto out;
5936 
5937 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5938 	if (ret < 0) {
5939 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5940 						     &tr->array_buffer, cpu);
5941 		if (r < 0) {
5942 			/*
5943 			 * AARGH! We are left with different
5944 			 * size max buffer!!!!
5945 			 * The max buffer is our "snapshot" buffer.
5946 			 * When a tracer needs a snapshot (one of the
5947 			 * latency tracers), it swaps the max buffer
5948 			 * with the saved snap shot. We succeeded to
5949 			 * update the size of the main buffer, but failed to
5950 			 * update the size of the max buffer. But when we tried
5951 			 * to reset the main buffer to the original size, we
5952 			 * failed there too. This is very unlikely to
5953 			 * happen, but if it does, warn and kill all
5954 			 * tracing.
5955 			 */
5956 			WARN_ON(1);
5957 			tracing_disabled = 1;
5958 		}
5959 		goto out_start;
5960 	}
5961 
5962 	update_buffer_entries(&tr->max_buffer, cpu);
5963 
5964  out:
5965 #endif /* CONFIG_TRACER_MAX_TRACE */
5966 
5967 	update_buffer_entries(&tr->array_buffer, cpu);
5968  out_start:
5969 	tracing_start_tr(tr);
5970 	return ret;
5971 }
5972 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5973 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5974 				  unsigned long size, int cpu_id)
5975 {
5976 	int ret = size;
5977 
5978 	mutex_lock(&trace_types_lock);
5979 
5980 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5981 		/* make sure, this cpu is enabled in the mask */
5982 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5983 			ret = -EINVAL;
5984 			goto out;
5985 		}
5986 	}
5987 
5988 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5989 	if (ret < 0)
5990 		ret = -ENOMEM;
5991 
5992 out:
5993 	mutex_unlock(&trace_types_lock);
5994 
5995 	return ret;
5996 }
5997 
5998 
5999 /**
6000  * tracing_update_buffers - used by tracing facility to expand ring buffers
6001  *
6002  * To save on memory when the tracing is never used on a system with it
6003  * configured in. The ring buffers are set to a minimum size. But once
6004  * a user starts to use the tracing facility, then they need to grow
6005  * to their default size.
6006  *
6007  * This function is to be called when a tracer is about to be used.
6008  */
tracing_update_buffers(void)6009 int tracing_update_buffers(void)
6010 {
6011 	int ret = 0;
6012 
6013 	mutex_lock(&trace_types_lock);
6014 	if (!ring_buffer_expanded)
6015 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6016 						RING_BUFFER_ALL_CPUS);
6017 	mutex_unlock(&trace_types_lock);
6018 
6019 	return ret;
6020 }
6021 
6022 struct trace_option_dentry;
6023 
6024 static void
6025 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6026 
6027 /*
6028  * Used to clear out the tracer before deletion of an instance.
6029  * Must have trace_types_lock held.
6030  */
tracing_set_nop(struct trace_array * tr)6031 static void tracing_set_nop(struct trace_array *tr)
6032 {
6033 	if (tr->current_trace == &nop_trace)
6034 		return;
6035 
6036 	tr->current_trace->enabled--;
6037 
6038 	if (tr->current_trace->reset)
6039 		tr->current_trace->reset(tr);
6040 
6041 	tr->current_trace = &nop_trace;
6042 }
6043 
6044 static bool tracer_options_updated;
6045 
add_tracer_options(struct trace_array * tr,struct tracer * t)6046 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6047 {
6048 	/* Only enable if the directory has been created already. */
6049 	if (!tr->dir)
6050 		return;
6051 
6052 	/* Only create trace option files after update_tracer_options finish */
6053 	if (!tracer_options_updated)
6054 		return;
6055 
6056 	create_trace_option_files(tr, t);
6057 }
6058 
tracing_set_tracer(struct trace_array * tr,const char * buf)6059 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6060 {
6061 	struct tracer *t;
6062 #ifdef CONFIG_TRACER_MAX_TRACE
6063 	bool had_max_tr;
6064 #endif
6065 	int ret = 0;
6066 
6067 	mutex_lock(&trace_types_lock);
6068 
6069 	if (!ring_buffer_expanded) {
6070 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6071 						RING_BUFFER_ALL_CPUS);
6072 		if (ret < 0)
6073 			goto out;
6074 		ret = 0;
6075 	}
6076 
6077 	for (t = trace_types; t; t = t->next) {
6078 		if (strcmp(t->name, buf) == 0)
6079 			break;
6080 	}
6081 	if (!t) {
6082 		ret = -EINVAL;
6083 		goto out;
6084 	}
6085 	if (t == tr->current_trace)
6086 		goto out;
6087 
6088 #ifdef CONFIG_TRACER_SNAPSHOT
6089 	if (t->use_max_tr) {
6090 		local_irq_disable();
6091 		arch_spin_lock(&tr->max_lock);
6092 		if (tr->cond_snapshot)
6093 			ret = -EBUSY;
6094 		arch_spin_unlock(&tr->max_lock);
6095 		local_irq_enable();
6096 		if (ret)
6097 			goto out;
6098 	}
6099 #endif
6100 	/* Some tracers won't work on kernel command line */
6101 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6102 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6103 			t->name);
6104 		goto out;
6105 	}
6106 
6107 	/* Some tracers are only allowed for the top level buffer */
6108 	if (!trace_ok_for_array(t, tr)) {
6109 		ret = -EINVAL;
6110 		goto out;
6111 	}
6112 
6113 	/* If trace pipe files are being read, we can't change the tracer */
6114 	if (tr->trace_ref) {
6115 		ret = -EBUSY;
6116 		goto out;
6117 	}
6118 
6119 	trace_branch_disable();
6120 
6121 	tr->current_trace->enabled--;
6122 
6123 	if (tr->current_trace->reset)
6124 		tr->current_trace->reset(tr);
6125 
6126 #ifdef CONFIG_TRACER_MAX_TRACE
6127 	had_max_tr = tr->current_trace->use_max_tr;
6128 
6129 	/* Current trace needs to be nop_trace before synchronize_rcu */
6130 	tr->current_trace = &nop_trace;
6131 
6132 	if (had_max_tr && !t->use_max_tr) {
6133 		/*
6134 		 * We need to make sure that the update_max_tr sees that
6135 		 * current_trace changed to nop_trace to keep it from
6136 		 * swapping the buffers after we resize it.
6137 		 * The update_max_tr is called from interrupts disabled
6138 		 * so a synchronized_sched() is sufficient.
6139 		 */
6140 		synchronize_rcu();
6141 		free_snapshot(tr);
6142 	}
6143 
6144 	if (t->use_max_tr && !tr->allocated_snapshot) {
6145 		ret = tracing_alloc_snapshot_instance(tr);
6146 		if (ret < 0)
6147 			goto out;
6148 	}
6149 #else
6150 	tr->current_trace = &nop_trace;
6151 #endif
6152 
6153 	if (t->init) {
6154 		ret = tracer_init(t, tr);
6155 		if (ret)
6156 			goto out;
6157 	}
6158 
6159 	tr->current_trace = t;
6160 	tr->current_trace->enabled++;
6161 	trace_branch_enable(tr);
6162  out:
6163 	mutex_unlock(&trace_types_lock);
6164 
6165 	return ret;
6166 }
6167 
6168 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6169 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6170 			size_t cnt, loff_t *ppos)
6171 {
6172 	struct trace_array *tr = filp->private_data;
6173 	char buf[MAX_TRACER_SIZE+1];
6174 	int i;
6175 	size_t ret;
6176 	int err;
6177 
6178 	ret = cnt;
6179 
6180 	if (cnt > MAX_TRACER_SIZE)
6181 		cnt = MAX_TRACER_SIZE;
6182 
6183 	if (copy_from_user(buf, ubuf, cnt))
6184 		return -EFAULT;
6185 
6186 	buf[cnt] = 0;
6187 
6188 	/* strip ending whitespace. */
6189 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6190 		buf[i] = 0;
6191 
6192 	err = tracing_set_tracer(tr, buf);
6193 	if (err)
6194 		return err;
6195 
6196 	*ppos += ret;
6197 
6198 	return ret;
6199 }
6200 
6201 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6202 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6203 		   size_t cnt, loff_t *ppos)
6204 {
6205 	char buf[64];
6206 	int r;
6207 
6208 	r = snprintf(buf, sizeof(buf), "%ld\n",
6209 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6210 	if (r > sizeof(buf))
6211 		r = sizeof(buf);
6212 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6213 }
6214 
6215 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6216 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6217 		    size_t cnt, loff_t *ppos)
6218 {
6219 	unsigned long val;
6220 	int ret;
6221 
6222 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6223 	if (ret)
6224 		return ret;
6225 
6226 	*ptr = val * 1000;
6227 
6228 	return cnt;
6229 }
6230 
6231 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6232 tracing_thresh_read(struct file *filp, char __user *ubuf,
6233 		    size_t cnt, loff_t *ppos)
6234 {
6235 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6236 }
6237 
6238 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6239 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6240 		     size_t cnt, loff_t *ppos)
6241 {
6242 	struct trace_array *tr = filp->private_data;
6243 	int ret;
6244 
6245 	mutex_lock(&trace_types_lock);
6246 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6247 	if (ret < 0)
6248 		goto out;
6249 
6250 	if (tr->current_trace->update_thresh) {
6251 		ret = tr->current_trace->update_thresh(tr);
6252 		if (ret < 0)
6253 			goto out;
6254 	}
6255 
6256 	ret = cnt;
6257 out:
6258 	mutex_unlock(&trace_types_lock);
6259 
6260 	return ret;
6261 }
6262 
6263 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6264 
6265 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6266 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6267 		     size_t cnt, loff_t *ppos)
6268 {
6269 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6270 }
6271 
6272 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6273 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6274 		      size_t cnt, loff_t *ppos)
6275 {
6276 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6277 }
6278 
6279 #endif
6280 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6281 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6282 {
6283 	if (cpu == RING_BUFFER_ALL_CPUS) {
6284 		if (cpumask_empty(tr->pipe_cpumask)) {
6285 			cpumask_setall(tr->pipe_cpumask);
6286 			return 0;
6287 		}
6288 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6289 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6290 		return 0;
6291 	}
6292 	return -EBUSY;
6293 }
6294 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6295 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6296 {
6297 	if (cpu == RING_BUFFER_ALL_CPUS) {
6298 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6299 		cpumask_clear(tr->pipe_cpumask);
6300 	} else {
6301 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6302 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6303 	}
6304 }
6305 
tracing_open_pipe(struct inode * inode,struct file * filp)6306 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6307 {
6308 	struct trace_array *tr = inode->i_private;
6309 	struct trace_iterator *iter;
6310 	int cpu;
6311 	int ret;
6312 
6313 	ret = tracing_check_open_get_tr(tr);
6314 	if (ret)
6315 		return ret;
6316 
6317 	mutex_lock(&trace_types_lock);
6318 	cpu = tracing_get_cpu(inode);
6319 	ret = open_pipe_on_cpu(tr, cpu);
6320 	if (ret)
6321 		goto fail_pipe_on_cpu;
6322 
6323 	/* create a buffer to store the information to pass to userspace */
6324 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6325 	if (!iter) {
6326 		ret = -ENOMEM;
6327 		goto fail_alloc_iter;
6328 	}
6329 
6330 	trace_seq_init(&iter->seq);
6331 	iter->trace = tr->current_trace;
6332 
6333 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6334 		ret = -ENOMEM;
6335 		goto fail;
6336 	}
6337 
6338 	/* trace pipe does not show start of buffer */
6339 	cpumask_setall(iter->started);
6340 
6341 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6342 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6343 
6344 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6345 	if (trace_clocks[tr->clock_id].in_ns)
6346 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6347 
6348 	iter->tr = tr;
6349 	iter->array_buffer = &tr->array_buffer;
6350 	iter->cpu_file = cpu;
6351 	mutex_init(&iter->mutex);
6352 	filp->private_data = iter;
6353 
6354 	if (iter->trace->pipe_open)
6355 		iter->trace->pipe_open(iter);
6356 
6357 	nonseekable_open(inode, filp);
6358 
6359 	tr->trace_ref++;
6360 
6361 	mutex_unlock(&trace_types_lock);
6362 	return ret;
6363 
6364 fail:
6365 	kfree(iter);
6366 fail_alloc_iter:
6367 	close_pipe_on_cpu(tr, cpu);
6368 fail_pipe_on_cpu:
6369 	__trace_array_put(tr);
6370 	mutex_unlock(&trace_types_lock);
6371 	return ret;
6372 }
6373 
tracing_release_pipe(struct inode * inode,struct file * file)6374 static int tracing_release_pipe(struct inode *inode, struct file *file)
6375 {
6376 	struct trace_iterator *iter = file->private_data;
6377 	struct trace_array *tr = inode->i_private;
6378 
6379 	mutex_lock(&trace_types_lock);
6380 
6381 	tr->trace_ref--;
6382 
6383 	if (iter->trace->pipe_close)
6384 		iter->trace->pipe_close(iter);
6385 	close_pipe_on_cpu(tr, iter->cpu_file);
6386 	mutex_unlock(&trace_types_lock);
6387 
6388 	free_cpumask_var(iter->started);
6389 	kfree(iter->temp);
6390 	mutex_destroy(&iter->mutex);
6391 	kfree(iter);
6392 
6393 	trace_array_put(tr);
6394 
6395 	return 0;
6396 }
6397 
6398 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6399 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6400 {
6401 	struct trace_array *tr = iter->tr;
6402 
6403 	/* Iterators are static, they should be filled or empty */
6404 	if (trace_buffer_iter(iter, iter->cpu_file))
6405 		return EPOLLIN | EPOLLRDNORM;
6406 
6407 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6408 		/*
6409 		 * Always select as readable when in blocking mode
6410 		 */
6411 		return EPOLLIN | EPOLLRDNORM;
6412 	else
6413 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6414 					     filp, poll_table, iter->tr->buffer_percent);
6415 }
6416 
6417 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6418 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6419 {
6420 	struct trace_iterator *iter = filp->private_data;
6421 
6422 	return trace_poll(iter, filp, poll_table);
6423 }
6424 
6425 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6426 static int tracing_wait_pipe(struct file *filp)
6427 {
6428 	struct trace_iterator *iter = filp->private_data;
6429 	int ret;
6430 
6431 	while (trace_empty(iter)) {
6432 
6433 		if ((filp->f_flags & O_NONBLOCK)) {
6434 			return -EAGAIN;
6435 		}
6436 
6437 		/*
6438 		 * We block until we read something and tracing is disabled.
6439 		 * We still block if tracing is disabled, but we have never
6440 		 * read anything. This allows a user to cat this file, and
6441 		 * then enable tracing. But after we have read something,
6442 		 * we give an EOF when tracing is again disabled.
6443 		 *
6444 		 * iter->pos will be 0 if we haven't read anything.
6445 		 */
6446 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6447 			break;
6448 
6449 		mutex_unlock(&iter->mutex);
6450 
6451 		ret = wait_on_pipe(iter, 0);
6452 
6453 		mutex_lock(&iter->mutex);
6454 
6455 		if (ret)
6456 			return ret;
6457 	}
6458 
6459 	return 1;
6460 }
6461 
6462 /*
6463  * Consumer reader.
6464  */
6465 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6466 tracing_read_pipe(struct file *filp, char __user *ubuf,
6467 		  size_t cnt, loff_t *ppos)
6468 {
6469 	struct trace_iterator *iter = filp->private_data;
6470 	ssize_t sret;
6471 
6472 	/*
6473 	 * Avoid more than one consumer on a single file descriptor
6474 	 * This is just a matter of traces coherency, the ring buffer itself
6475 	 * is protected.
6476 	 */
6477 	mutex_lock(&iter->mutex);
6478 
6479 	/* return any leftover data */
6480 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6481 	if (sret != -EBUSY)
6482 		goto out;
6483 
6484 	trace_seq_init(&iter->seq);
6485 
6486 	if (iter->trace->read) {
6487 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6488 		if (sret)
6489 			goto out;
6490 	}
6491 
6492 waitagain:
6493 	sret = tracing_wait_pipe(filp);
6494 	if (sret <= 0)
6495 		goto out;
6496 
6497 	/* stop when tracing is finished */
6498 	if (trace_empty(iter)) {
6499 		sret = 0;
6500 		goto out;
6501 	}
6502 
6503 	if (cnt >= PAGE_SIZE)
6504 		cnt = PAGE_SIZE - 1;
6505 
6506 	/* reset all but tr, trace, and overruns */
6507 	memset(&iter->seq, 0,
6508 	       sizeof(struct trace_iterator) -
6509 	       offsetof(struct trace_iterator, seq));
6510 	cpumask_clear(iter->started);
6511 	trace_seq_init(&iter->seq);
6512 	iter->pos = -1;
6513 
6514 	trace_event_read_lock();
6515 	trace_access_lock(iter->cpu_file);
6516 	while (trace_find_next_entry_inc(iter) != NULL) {
6517 		enum print_line_t ret;
6518 		int save_len = iter->seq.seq.len;
6519 
6520 		ret = print_trace_line(iter);
6521 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6522 			/*
6523 			 * If one print_trace_line() fills entire trace_seq in one shot,
6524 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6525 			 * In this case, we need to consume it, otherwise, loop will peek
6526 			 * this event next time, resulting in an infinite loop.
6527 			 */
6528 			if (save_len == 0) {
6529 				iter->seq.full = 0;
6530 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6531 				trace_consume(iter);
6532 				break;
6533 			}
6534 
6535 			/* In other cases, don't print partial lines */
6536 			iter->seq.seq.len = save_len;
6537 			break;
6538 		}
6539 		if (ret != TRACE_TYPE_NO_CONSUME)
6540 			trace_consume(iter);
6541 
6542 		if (trace_seq_used(&iter->seq) >= cnt)
6543 			break;
6544 
6545 		/*
6546 		 * Setting the full flag means we reached the trace_seq buffer
6547 		 * size and we should leave by partial output condition above.
6548 		 * One of the trace_seq_* functions is not used properly.
6549 		 */
6550 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6551 			  iter->ent->type);
6552 	}
6553 	trace_access_unlock(iter->cpu_file);
6554 	trace_event_read_unlock();
6555 
6556 	/* Now copy what we have to the user */
6557 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6558 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6559 		trace_seq_init(&iter->seq);
6560 
6561 	/*
6562 	 * If there was nothing to send to user, in spite of consuming trace
6563 	 * entries, go back to wait for more entries.
6564 	 */
6565 	if (sret == -EBUSY)
6566 		goto waitagain;
6567 
6568 out:
6569 	mutex_unlock(&iter->mutex);
6570 
6571 	return sret;
6572 }
6573 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6574 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6575 				     unsigned int idx)
6576 {
6577 	__free_page(spd->pages[idx]);
6578 }
6579 
6580 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6581 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6582 {
6583 	size_t count;
6584 	int save_len;
6585 	int ret;
6586 
6587 	/* Seq buffer is page-sized, exactly what we need. */
6588 	for (;;) {
6589 		save_len = iter->seq.seq.len;
6590 		ret = print_trace_line(iter);
6591 
6592 		if (trace_seq_has_overflowed(&iter->seq)) {
6593 			iter->seq.seq.len = save_len;
6594 			break;
6595 		}
6596 
6597 		/*
6598 		 * This should not be hit, because it should only
6599 		 * be set if the iter->seq overflowed. But check it
6600 		 * anyway to be safe.
6601 		 */
6602 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6603 			iter->seq.seq.len = save_len;
6604 			break;
6605 		}
6606 
6607 		count = trace_seq_used(&iter->seq) - save_len;
6608 		if (rem < count) {
6609 			rem = 0;
6610 			iter->seq.seq.len = save_len;
6611 			break;
6612 		}
6613 
6614 		if (ret != TRACE_TYPE_NO_CONSUME)
6615 			trace_consume(iter);
6616 		rem -= count;
6617 		if (!trace_find_next_entry_inc(iter))	{
6618 			rem = 0;
6619 			iter->ent = NULL;
6620 			break;
6621 		}
6622 	}
6623 
6624 	return rem;
6625 }
6626 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6627 static ssize_t tracing_splice_read_pipe(struct file *filp,
6628 					loff_t *ppos,
6629 					struct pipe_inode_info *pipe,
6630 					size_t len,
6631 					unsigned int flags)
6632 {
6633 	struct page *pages_def[PIPE_DEF_BUFFERS];
6634 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6635 	struct trace_iterator *iter = filp->private_data;
6636 	struct splice_pipe_desc spd = {
6637 		.pages		= pages_def,
6638 		.partial	= partial_def,
6639 		.nr_pages	= 0, /* This gets updated below. */
6640 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6641 		.ops		= &default_pipe_buf_ops,
6642 		.spd_release	= tracing_spd_release_pipe,
6643 	};
6644 	ssize_t ret;
6645 	size_t rem;
6646 	unsigned int i;
6647 
6648 	if (splice_grow_spd(pipe, &spd))
6649 		return -ENOMEM;
6650 
6651 	mutex_lock(&iter->mutex);
6652 
6653 	if (iter->trace->splice_read) {
6654 		ret = iter->trace->splice_read(iter, filp,
6655 					       ppos, pipe, len, flags);
6656 		if (ret)
6657 			goto out_err;
6658 	}
6659 
6660 	ret = tracing_wait_pipe(filp);
6661 	if (ret <= 0)
6662 		goto out_err;
6663 
6664 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6665 		ret = -EFAULT;
6666 		goto out_err;
6667 	}
6668 
6669 	trace_event_read_lock();
6670 	trace_access_lock(iter->cpu_file);
6671 
6672 	/* Fill as many pages as possible. */
6673 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6674 		spd.pages[i] = alloc_page(GFP_KERNEL);
6675 		if (!spd.pages[i])
6676 			break;
6677 
6678 		rem = tracing_fill_pipe_page(rem, iter);
6679 
6680 		/* Copy the data into the page, so we can start over. */
6681 		ret = trace_seq_to_buffer(&iter->seq,
6682 					  page_address(spd.pages[i]),
6683 					  trace_seq_used(&iter->seq));
6684 		if (ret < 0) {
6685 			__free_page(spd.pages[i]);
6686 			break;
6687 		}
6688 		spd.partial[i].offset = 0;
6689 		spd.partial[i].len = trace_seq_used(&iter->seq);
6690 
6691 		trace_seq_init(&iter->seq);
6692 	}
6693 
6694 	trace_access_unlock(iter->cpu_file);
6695 	trace_event_read_unlock();
6696 	mutex_unlock(&iter->mutex);
6697 
6698 	spd.nr_pages = i;
6699 
6700 	if (i)
6701 		ret = splice_to_pipe(pipe, &spd);
6702 	else
6703 		ret = 0;
6704 out:
6705 	splice_shrink_spd(&spd);
6706 	return ret;
6707 
6708 out_err:
6709 	mutex_unlock(&iter->mutex);
6710 	goto out;
6711 }
6712 
6713 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6714 tracing_entries_read(struct file *filp, char __user *ubuf,
6715 		     size_t cnt, loff_t *ppos)
6716 {
6717 	struct inode *inode = file_inode(filp);
6718 	struct trace_array *tr = inode->i_private;
6719 	int cpu = tracing_get_cpu(inode);
6720 	char buf[64];
6721 	int r = 0;
6722 	ssize_t ret;
6723 
6724 	mutex_lock(&trace_types_lock);
6725 
6726 	if (cpu == RING_BUFFER_ALL_CPUS) {
6727 		int cpu, buf_size_same;
6728 		unsigned long size;
6729 
6730 		size = 0;
6731 		buf_size_same = 1;
6732 		/* check if all cpu sizes are same */
6733 		for_each_tracing_cpu(cpu) {
6734 			/* fill in the size from first enabled cpu */
6735 			if (size == 0)
6736 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6737 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6738 				buf_size_same = 0;
6739 				break;
6740 			}
6741 		}
6742 
6743 		if (buf_size_same) {
6744 			if (!ring_buffer_expanded)
6745 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6746 					    size >> 10,
6747 					    trace_buf_size >> 10);
6748 			else
6749 				r = sprintf(buf, "%lu\n", size >> 10);
6750 		} else
6751 			r = sprintf(buf, "X\n");
6752 	} else
6753 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6754 
6755 	mutex_unlock(&trace_types_lock);
6756 
6757 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6758 	return ret;
6759 }
6760 
6761 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6762 tracing_entries_write(struct file *filp, const char __user *ubuf,
6763 		      size_t cnt, loff_t *ppos)
6764 {
6765 	struct inode *inode = file_inode(filp);
6766 	struct trace_array *tr = inode->i_private;
6767 	unsigned long val;
6768 	int ret;
6769 
6770 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6771 	if (ret)
6772 		return ret;
6773 
6774 	/* must have at least 1 entry */
6775 	if (!val)
6776 		return -EINVAL;
6777 
6778 	/* value is in KB */
6779 	val <<= 10;
6780 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6781 	if (ret < 0)
6782 		return ret;
6783 
6784 	*ppos += cnt;
6785 
6786 	return cnt;
6787 }
6788 
6789 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6790 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6791 				size_t cnt, loff_t *ppos)
6792 {
6793 	struct trace_array *tr = filp->private_data;
6794 	char buf[64];
6795 	int r, cpu;
6796 	unsigned long size = 0, expanded_size = 0;
6797 
6798 	mutex_lock(&trace_types_lock);
6799 	for_each_tracing_cpu(cpu) {
6800 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6801 		if (!ring_buffer_expanded)
6802 			expanded_size += trace_buf_size >> 10;
6803 	}
6804 	if (ring_buffer_expanded)
6805 		r = sprintf(buf, "%lu\n", size);
6806 	else
6807 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6808 	mutex_unlock(&trace_types_lock);
6809 
6810 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6811 }
6812 
6813 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6814 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6815 			  size_t cnt, loff_t *ppos)
6816 {
6817 	/*
6818 	 * There is no need to read what the user has written, this function
6819 	 * is just to make sure that there is no error when "echo" is used
6820 	 */
6821 
6822 	*ppos += cnt;
6823 
6824 	return cnt;
6825 }
6826 
6827 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6828 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6829 {
6830 	struct trace_array *tr = inode->i_private;
6831 
6832 	/* disable tracing ? */
6833 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6834 		tracer_tracing_off(tr);
6835 	/* resize the ring buffer to 0 */
6836 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6837 
6838 	trace_array_put(tr);
6839 
6840 	return 0;
6841 }
6842 
6843 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6844 tracing_mark_write(struct file *filp, const char __user *ubuf,
6845 					size_t cnt, loff_t *fpos)
6846 {
6847 	struct trace_array *tr = filp->private_data;
6848 	struct ring_buffer_event *event;
6849 	enum event_trigger_type tt = ETT_NONE;
6850 	struct trace_buffer *buffer;
6851 	struct print_entry *entry;
6852 	unsigned long irq_flags;
6853 	ssize_t written;
6854 	int size;
6855 	int len;
6856 
6857 /* Used in tracing_mark_raw_write() as well */
6858 #define FAULTED_STR "<faulted>"
6859 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6860 
6861 	if (tracing_disabled)
6862 		return -EINVAL;
6863 
6864 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6865 		return -EINVAL;
6866 
6867 	if (cnt > TRACE_BUF_SIZE)
6868 		cnt = TRACE_BUF_SIZE;
6869 
6870 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6871 
6872 	local_save_flags(irq_flags);
6873 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6874 
6875 	/* If less than "<faulted>", then make sure we can still add that */
6876 	if (cnt < FAULTED_SIZE)
6877 		size += FAULTED_SIZE - cnt;
6878 
6879 	buffer = tr->array_buffer.buffer;
6880 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6881 					    irq_flags, preempt_count());
6882 	if (unlikely(!event))
6883 		/* Ring buffer disabled, return as if not open for write */
6884 		return -EBADF;
6885 
6886 	entry = ring_buffer_event_data(event);
6887 	entry->ip = _THIS_IP_;
6888 
6889 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6890 	if (len) {
6891 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6892 		cnt = FAULTED_SIZE;
6893 		written = -EFAULT;
6894 	} else
6895 		written = cnt;
6896 
6897 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6898 		/* do not add \n before testing triggers, but add \0 */
6899 		entry->buf[cnt] = '\0';
6900 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6901 	}
6902 
6903 	if (entry->buf[cnt - 1] != '\n') {
6904 		entry->buf[cnt] = '\n';
6905 		entry->buf[cnt + 1] = '\0';
6906 	} else
6907 		entry->buf[cnt] = '\0';
6908 
6909 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6910 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6911 	__buffer_unlock_commit(buffer, event);
6912 
6913 	if (tt)
6914 		event_triggers_post_call(tr->trace_marker_file, tt);
6915 
6916 	if (written > 0)
6917 		*fpos += written;
6918 
6919 	return written;
6920 }
6921 
6922 /* Limit it for now to 3K (including tag) */
6923 #define RAW_DATA_MAX_SIZE (1024*3)
6924 
6925 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6926 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6927 					size_t cnt, loff_t *fpos)
6928 {
6929 	struct trace_array *tr = filp->private_data;
6930 	struct ring_buffer_event *event;
6931 	struct trace_buffer *buffer;
6932 	struct raw_data_entry *entry;
6933 	unsigned long irq_flags;
6934 	ssize_t written;
6935 	int size;
6936 	int len;
6937 
6938 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6939 
6940 	if (tracing_disabled)
6941 		return -EINVAL;
6942 
6943 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6944 		return -EINVAL;
6945 
6946 	/* The marker must at least have a tag id */
6947 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6948 		return -EINVAL;
6949 
6950 	if (cnt > TRACE_BUF_SIZE)
6951 		cnt = TRACE_BUF_SIZE;
6952 
6953 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6954 
6955 	local_save_flags(irq_flags);
6956 	size = sizeof(*entry) + cnt;
6957 	if (cnt < FAULT_SIZE_ID)
6958 		size += FAULT_SIZE_ID - cnt;
6959 
6960 	buffer = tr->array_buffer.buffer;
6961 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6962 					    irq_flags, preempt_count());
6963 	if (!event)
6964 		/* Ring buffer disabled, return as if not open for write */
6965 		return -EBADF;
6966 
6967 	entry = ring_buffer_event_data(event);
6968 
6969 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6970 	if (len) {
6971 		entry->id = -1;
6972 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6973 		written = -EFAULT;
6974 	} else
6975 		written = cnt;
6976 
6977 	__buffer_unlock_commit(buffer, event);
6978 
6979 	if (written > 0)
6980 		*fpos += written;
6981 
6982 	return written;
6983 }
6984 
tracing_clock_show(struct seq_file * m,void * v)6985 static int tracing_clock_show(struct seq_file *m, void *v)
6986 {
6987 	struct trace_array *tr = m->private;
6988 	int i;
6989 
6990 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6991 		seq_printf(m,
6992 			"%s%s%s%s", i ? " " : "",
6993 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6994 			i == tr->clock_id ? "]" : "");
6995 	seq_putc(m, '\n');
6996 
6997 	return 0;
6998 }
6999 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7000 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7001 {
7002 	int i;
7003 
7004 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7005 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7006 			break;
7007 	}
7008 	if (i == ARRAY_SIZE(trace_clocks))
7009 		return -EINVAL;
7010 
7011 	mutex_lock(&trace_types_lock);
7012 
7013 	tr->clock_id = i;
7014 
7015 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7016 
7017 	/*
7018 	 * New clock may not be consistent with the previous clock.
7019 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7020 	 */
7021 	tracing_reset_online_cpus(&tr->array_buffer);
7022 
7023 #ifdef CONFIG_TRACER_MAX_TRACE
7024 	if (tr->max_buffer.buffer)
7025 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7026 	tracing_reset_online_cpus(&tr->max_buffer);
7027 #endif
7028 
7029 	mutex_unlock(&trace_types_lock);
7030 
7031 	return 0;
7032 }
7033 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7034 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7035 				   size_t cnt, loff_t *fpos)
7036 {
7037 	struct seq_file *m = filp->private_data;
7038 	struct trace_array *tr = m->private;
7039 	char buf[64];
7040 	const char *clockstr;
7041 	int ret;
7042 
7043 	if (cnt >= sizeof(buf))
7044 		return -EINVAL;
7045 
7046 	if (copy_from_user(buf, ubuf, cnt))
7047 		return -EFAULT;
7048 
7049 	buf[cnt] = 0;
7050 
7051 	clockstr = strstrip(buf);
7052 
7053 	ret = tracing_set_clock(tr, clockstr);
7054 	if (ret)
7055 		return ret;
7056 
7057 	*fpos += cnt;
7058 
7059 	return cnt;
7060 }
7061 
tracing_clock_open(struct inode * inode,struct file * file)7062 static int tracing_clock_open(struct inode *inode, struct file *file)
7063 {
7064 	struct trace_array *tr = inode->i_private;
7065 	int ret;
7066 
7067 	ret = tracing_check_open_get_tr(tr);
7068 	if (ret)
7069 		return ret;
7070 
7071 	ret = single_open(file, tracing_clock_show, inode->i_private);
7072 	if (ret < 0)
7073 		trace_array_put(tr);
7074 
7075 	return ret;
7076 }
7077 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7078 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7079 {
7080 	struct trace_array *tr = m->private;
7081 
7082 	mutex_lock(&trace_types_lock);
7083 
7084 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7085 		seq_puts(m, "delta [absolute]\n");
7086 	else
7087 		seq_puts(m, "[delta] absolute\n");
7088 
7089 	mutex_unlock(&trace_types_lock);
7090 
7091 	return 0;
7092 }
7093 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7094 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7095 {
7096 	struct trace_array *tr = inode->i_private;
7097 	int ret;
7098 
7099 	ret = tracing_check_open_get_tr(tr);
7100 	if (ret)
7101 		return ret;
7102 
7103 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7104 	if (ret < 0)
7105 		trace_array_put(tr);
7106 
7107 	return ret;
7108 }
7109 
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)7110 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
7111 {
7112 	int ret = 0;
7113 
7114 	mutex_lock(&trace_types_lock);
7115 
7116 	if (abs && tr->time_stamp_abs_ref++)
7117 		goto out;
7118 
7119 	if (!abs) {
7120 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
7121 			ret = -EINVAL;
7122 			goto out;
7123 		}
7124 
7125 		if (--tr->time_stamp_abs_ref)
7126 			goto out;
7127 	}
7128 
7129 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
7130 
7131 #ifdef CONFIG_TRACER_MAX_TRACE
7132 	if (tr->max_buffer.buffer)
7133 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
7134 #endif
7135  out:
7136 	mutex_unlock(&trace_types_lock);
7137 
7138 	return ret;
7139 }
7140 
7141 struct ftrace_buffer_info {
7142 	struct trace_iterator	iter;
7143 	void			*spare;
7144 	unsigned int		spare_cpu;
7145 	unsigned int		read;
7146 };
7147 
7148 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7149 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7150 {
7151 	struct trace_array *tr = inode->i_private;
7152 	struct trace_iterator *iter;
7153 	struct seq_file *m;
7154 	int ret;
7155 
7156 	ret = tracing_check_open_get_tr(tr);
7157 	if (ret)
7158 		return ret;
7159 
7160 	if (file->f_mode & FMODE_READ) {
7161 		iter = __tracing_open(inode, file, true);
7162 		if (IS_ERR(iter))
7163 			ret = PTR_ERR(iter);
7164 	} else {
7165 		/* Writes still need the seq_file to hold the private data */
7166 		ret = -ENOMEM;
7167 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7168 		if (!m)
7169 			goto out;
7170 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7171 		if (!iter) {
7172 			kfree(m);
7173 			goto out;
7174 		}
7175 		ret = 0;
7176 
7177 		iter->tr = tr;
7178 		iter->array_buffer = &tr->max_buffer;
7179 		iter->cpu_file = tracing_get_cpu(inode);
7180 		m->private = iter;
7181 		file->private_data = m;
7182 	}
7183 out:
7184 	if (ret < 0)
7185 		trace_array_put(tr);
7186 
7187 	return ret;
7188 }
7189 
tracing_swap_cpu_buffer(void * tr)7190 static void tracing_swap_cpu_buffer(void *tr)
7191 {
7192 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7193 }
7194 
7195 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7196 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7197 		       loff_t *ppos)
7198 {
7199 	struct seq_file *m = filp->private_data;
7200 	struct trace_iterator *iter = m->private;
7201 	struct trace_array *tr = iter->tr;
7202 	unsigned long val;
7203 	int ret;
7204 
7205 	ret = tracing_update_buffers();
7206 	if (ret < 0)
7207 		return ret;
7208 
7209 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7210 	if (ret)
7211 		return ret;
7212 
7213 	mutex_lock(&trace_types_lock);
7214 
7215 	if (tr->current_trace->use_max_tr) {
7216 		ret = -EBUSY;
7217 		goto out;
7218 	}
7219 
7220 	local_irq_disable();
7221 	arch_spin_lock(&tr->max_lock);
7222 	if (tr->cond_snapshot)
7223 		ret = -EBUSY;
7224 	arch_spin_unlock(&tr->max_lock);
7225 	local_irq_enable();
7226 	if (ret)
7227 		goto out;
7228 
7229 	switch (val) {
7230 	case 0:
7231 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7232 			ret = -EINVAL;
7233 			break;
7234 		}
7235 		if (tr->allocated_snapshot)
7236 			free_snapshot(tr);
7237 		break;
7238 	case 1:
7239 /* Only allow per-cpu swap if the ring buffer supports it */
7240 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7241 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7242 			ret = -EINVAL;
7243 			break;
7244 		}
7245 #endif
7246 		if (tr->allocated_snapshot)
7247 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7248 					&tr->array_buffer, iter->cpu_file);
7249 		else
7250 			ret = tracing_alloc_snapshot_instance(tr);
7251 		if (ret < 0)
7252 			break;
7253 		/* Now, we're going to swap */
7254 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7255 			local_irq_disable();
7256 			update_max_tr(tr, current, smp_processor_id(), NULL);
7257 			local_irq_enable();
7258 		} else {
7259 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7260 						 (void *)tr, 1);
7261 		}
7262 		break;
7263 	default:
7264 		if (tr->allocated_snapshot) {
7265 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7266 				tracing_reset_online_cpus(&tr->max_buffer);
7267 			else
7268 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7269 		}
7270 		break;
7271 	}
7272 
7273 	if (ret >= 0) {
7274 		*ppos += cnt;
7275 		ret = cnt;
7276 	}
7277 out:
7278 	mutex_unlock(&trace_types_lock);
7279 	return ret;
7280 }
7281 
tracing_snapshot_release(struct inode * inode,struct file * file)7282 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7283 {
7284 	struct seq_file *m = file->private_data;
7285 	int ret;
7286 
7287 	ret = tracing_release(inode, file);
7288 
7289 	if (file->f_mode & FMODE_READ)
7290 		return ret;
7291 
7292 	/* If write only, the seq_file is just a stub */
7293 	if (m)
7294 		kfree(m->private);
7295 	kfree(m);
7296 
7297 	return 0;
7298 }
7299 
7300 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7301 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7302 				    size_t count, loff_t *ppos);
7303 static int tracing_buffers_release(struct inode *inode, struct file *file);
7304 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7305 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7306 
snapshot_raw_open(struct inode * inode,struct file * filp)7307 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7308 {
7309 	struct ftrace_buffer_info *info;
7310 	int ret;
7311 
7312 	/* The following checks for tracefs lockdown */
7313 	ret = tracing_buffers_open(inode, filp);
7314 	if (ret < 0)
7315 		return ret;
7316 
7317 	info = filp->private_data;
7318 
7319 	if (info->iter.trace->use_max_tr) {
7320 		tracing_buffers_release(inode, filp);
7321 		return -EBUSY;
7322 	}
7323 
7324 	info->iter.snapshot = true;
7325 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7326 
7327 	return ret;
7328 }
7329 
7330 #endif /* CONFIG_TRACER_SNAPSHOT */
7331 
7332 
7333 static const struct file_operations tracing_thresh_fops = {
7334 	.open		= tracing_open_generic,
7335 	.read		= tracing_thresh_read,
7336 	.write		= tracing_thresh_write,
7337 	.llseek		= generic_file_llseek,
7338 };
7339 
7340 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7341 static const struct file_operations tracing_max_lat_fops = {
7342 	.open		= tracing_open_generic,
7343 	.read		= tracing_max_lat_read,
7344 	.write		= tracing_max_lat_write,
7345 	.llseek		= generic_file_llseek,
7346 };
7347 #endif
7348 
7349 static const struct file_operations set_tracer_fops = {
7350 	.open		= tracing_open_generic_tr,
7351 	.read		= tracing_set_trace_read,
7352 	.write		= tracing_set_trace_write,
7353 	.llseek		= generic_file_llseek,
7354 	.release	= tracing_release_generic_tr,
7355 };
7356 
7357 static const struct file_operations tracing_pipe_fops = {
7358 	.open		= tracing_open_pipe,
7359 	.poll		= tracing_poll_pipe,
7360 	.read		= tracing_read_pipe,
7361 	.splice_read	= tracing_splice_read_pipe,
7362 	.release	= tracing_release_pipe,
7363 	.llseek		= no_llseek,
7364 };
7365 
7366 static const struct file_operations tracing_entries_fops = {
7367 	.open		= tracing_open_generic_tr,
7368 	.read		= tracing_entries_read,
7369 	.write		= tracing_entries_write,
7370 	.llseek		= generic_file_llseek,
7371 	.release	= tracing_release_generic_tr,
7372 };
7373 
7374 static const struct file_operations tracing_total_entries_fops = {
7375 	.open		= tracing_open_generic_tr,
7376 	.read		= tracing_total_entries_read,
7377 	.llseek		= generic_file_llseek,
7378 	.release	= tracing_release_generic_tr,
7379 };
7380 
7381 static const struct file_operations tracing_free_buffer_fops = {
7382 	.open		= tracing_open_generic_tr,
7383 	.write		= tracing_free_buffer_write,
7384 	.release	= tracing_free_buffer_release,
7385 };
7386 
7387 static const struct file_operations tracing_mark_fops = {
7388 	.open		= tracing_open_generic_tr,
7389 	.write		= tracing_mark_write,
7390 	.llseek		= generic_file_llseek,
7391 	.release	= tracing_release_generic_tr,
7392 };
7393 
7394 static const struct file_operations tracing_mark_raw_fops = {
7395 	.open		= tracing_open_generic_tr,
7396 	.write		= tracing_mark_raw_write,
7397 	.llseek		= generic_file_llseek,
7398 	.release	= tracing_release_generic_tr,
7399 };
7400 
7401 static const struct file_operations trace_clock_fops = {
7402 	.open		= tracing_clock_open,
7403 	.read		= seq_read,
7404 	.llseek		= seq_lseek,
7405 	.release	= tracing_single_release_tr,
7406 	.write		= tracing_clock_write,
7407 };
7408 
7409 static const struct file_operations trace_time_stamp_mode_fops = {
7410 	.open		= tracing_time_stamp_mode_open,
7411 	.read		= seq_read,
7412 	.llseek		= seq_lseek,
7413 	.release	= tracing_single_release_tr,
7414 };
7415 
7416 #ifdef CONFIG_TRACER_SNAPSHOT
7417 static const struct file_operations snapshot_fops = {
7418 	.open		= tracing_snapshot_open,
7419 	.read		= seq_read,
7420 	.write		= tracing_snapshot_write,
7421 	.llseek		= tracing_lseek,
7422 	.release	= tracing_snapshot_release,
7423 };
7424 
7425 static const struct file_operations snapshot_raw_fops = {
7426 	.open		= snapshot_raw_open,
7427 	.read		= tracing_buffers_read,
7428 	.release	= tracing_buffers_release,
7429 	.splice_read	= tracing_buffers_splice_read,
7430 	.llseek		= no_llseek,
7431 };
7432 
7433 #endif /* CONFIG_TRACER_SNAPSHOT */
7434 
7435 #define TRACING_LOG_ERRS_MAX	8
7436 #define TRACING_LOG_LOC_MAX	128
7437 
7438 #define CMD_PREFIX "  Command: "
7439 
7440 struct err_info {
7441 	const char	**errs;	/* ptr to loc-specific array of err strings */
7442 	u8		type;	/* index into errs -> specific err string */
7443 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7444 	u64		ts;
7445 };
7446 
7447 struct tracing_log_err {
7448 	struct list_head	list;
7449 	struct err_info		info;
7450 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7451 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7452 };
7453 
7454 static DEFINE_MUTEX(tracing_err_log_lock);
7455 
get_tracing_log_err(struct trace_array * tr)7456 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7457 {
7458 	struct tracing_log_err *err;
7459 
7460 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7461 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7462 		if (!err)
7463 			err = ERR_PTR(-ENOMEM);
7464 		else
7465 			tr->n_err_log_entries++;
7466 
7467 		return err;
7468 	}
7469 
7470 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7471 	list_del(&err->list);
7472 
7473 	return err;
7474 }
7475 
7476 /**
7477  * err_pos - find the position of a string within a command for error careting
7478  * @cmd: The tracing command that caused the error
7479  * @str: The string to position the caret at within @cmd
7480  *
7481  * Finds the position of the first occurence of @str within @cmd.  The
7482  * return value can be passed to tracing_log_err() for caret placement
7483  * within @cmd.
7484  *
7485  * Returns the index within @cmd of the first occurence of @str or 0
7486  * if @str was not found.
7487  */
err_pos(char * cmd,const char * str)7488 unsigned int err_pos(char *cmd, const char *str)
7489 {
7490 	char *found;
7491 
7492 	if (WARN_ON(!strlen(cmd)))
7493 		return 0;
7494 
7495 	found = strstr(cmd, str);
7496 	if (found)
7497 		return found - cmd;
7498 
7499 	return 0;
7500 }
7501 
7502 /**
7503  * tracing_log_err - write an error to the tracing error log
7504  * @tr: The associated trace array for the error (NULL for top level array)
7505  * @loc: A string describing where the error occurred
7506  * @cmd: The tracing command that caused the error
7507  * @errs: The array of loc-specific static error strings
7508  * @type: The index into errs[], which produces the specific static err string
7509  * @pos: The position the caret should be placed in the cmd
7510  *
7511  * Writes an error into tracing/error_log of the form:
7512  *
7513  * <loc>: error: <text>
7514  *   Command: <cmd>
7515  *              ^
7516  *
7517  * tracing/error_log is a small log file containing the last
7518  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7519  * unless there has been a tracing error, and the error log can be
7520  * cleared and have its memory freed by writing the empty string in
7521  * truncation mode to it i.e. echo > tracing/error_log.
7522  *
7523  * NOTE: the @errs array along with the @type param are used to
7524  * produce a static error string - this string is not copied and saved
7525  * when the error is logged - only a pointer to it is saved.  See
7526  * existing callers for examples of how static strings are typically
7527  * defined for use with tracing_log_err().
7528  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7529 void tracing_log_err(struct trace_array *tr,
7530 		     const char *loc, const char *cmd,
7531 		     const char **errs, u8 type, u8 pos)
7532 {
7533 	struct tracing_log_err *err;
7534 
7535 	if (!tr)
7536 		tr = &global_trace;
7537 
7538 	mutex_lock(&tracing_err_log_lock);
7539 	err = get_tracing_log_err(tr);
7540 	if (PTR_ERR(err) == -ENOMEM) {
7541 		mutex_unlock(&tracing_err_log_lock);
7542 		return;
7543 	}
7544 
7545 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7546 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7547 
7548 	err->info.errs = errs;
7549 	err->info.type = type;
7550 	err->info.pos = pos;
7551 	err->info.ts = local_clock();
7552 
7553 	list_add_tail(&err->list, &tr->err_log);
7554 	mutex_unlock(&tracing_err_log_lock);
7555 }
7556 
clear_tracing_err_log(struct trace_array * tr)7557 static void clear_tracing_err_log(struct trace_array *tr)
7558 {
7559 	struct tracing_log_err *err, *next;
7560 
7561 	mutex_lock(&tracing_err_log_lock);
7562 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7563 		list_del(&err->list);
7564 		kfree(err);
7565 	}
7566 
7567 	tr->n_err_log_entries = 0;
7568 	mutex_unlock(&tracing_err_log_lock);
7569 }
7570 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7571 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7572 {
7573 	struct trace_array *tr = m->private;
7574 
7575 	mutex_lock(&tracing_err_log_lock);
7576 
7577 	return seq_list_start(&tr->err_log, *pos);
7578 }
7579 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7580 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7581 {
7582 	struct trace_array *tr = m->private;
7583 
7584 	return seq_list_next(v, &tr->err_log, pos);
7585 }
7586 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7587 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7588 {
7589 	mutex_unlock(&tracing_err_log_lock);
7590 }
7591 
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7592 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7593 {
7594 	u8 i;
7595 
7596 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7597 		seq_putc(m, ' ');
7598 	for (i = 0; i < pos; i++)
7599 		seq_putc(m, ' ');
7600 	seq_puts(m, "^\n");
7601 }
7602 
tracing_err_log_seq_show(struct seq_file * m,void * v)7603 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7604 {
7605 	struct tracing_log_err *err = v;
7606 
7607 	if (err) {
7608 		const char *err_text = err->info.errs[err->info.type];
7609 		u64 sec = err->info.ts;
7610 		u32 nsec;
7611 
7612 		nsec = do_div(sec, NSEC_PER_SEC);
7613 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7614 			   err->loc, err_text);
7615 		seq_printf(m, "%s", err->cmd);
7616 		tracing_err_log_show_pos(m, err->info.pos);
7617 	}
7618 
7619 	return 0;
7620 }
7621 
7622 static const struct seq_operations tracing_err_log_seq_ops = {
7623 	.start  = tracing_err_log_seq_start,
7624 	.next   = tracing_err_log_seq_next,
7625 	.stop   = tracing_err_log_seq_stop,
7626 	.show   = tracing_err_log_seq_show
7627 };
7628 
tracing_err_log_open(struct inode * inode,struct file * file)7629 static int tracing_err_log_open(struct inode *inode, struct file *file)
7630 {
7631 	struct trace_array *tr = inode->i_private;
7632 	int ret = 0;
7633 
7634 	ret = tracing_check_open_get_tr(tr);
7635 	if (ret)
7636 		return ret;
7637 
7638 	/* If this file was opened for write, then erase contents */
7639 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7640 		clear_tracing_err_log(tr);
7641 
7642 	if (file->f_mode & FMODE_READ) {
7643 		ret = seq_open(file, &tracing_err_log_seq_ops);
7644 		if (!ret) {
7645 			struct seq_file *m = file->private_data;
7646 			m->private = tr;
7647 		} else {
7648 			trace_array_put(tr);
7649 		}
7650 	}
7651 	return ret;
7652 }
7653 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7654 static ssize_t tracing_err_log_write(struct file *file,
7655 				     const char __user *buffer,
7656 				     size_t count, loff_t *ppos)
7657 {
7658 	return count;
7659 }
7660 
tracing_err_log_release(struct inode * inode,struct file * file)7661 static int tracing_err_log_release(struct inode *inode, struct file *file)
7662 {
7663 	struct trace_array *tr = inode->i_private;
7664 
7665 	trace_array_put(tr);
7666 
7667 	if (file->f_mode & FMODE_READ)
7668 		seq_release(inode, file);
7669 
7670 	return 0;
7671 }
7672 
7673 static const struct file_operations tracing_err_log_fops = {
7674 	.open           = tracing_err_log_open,
7675 	.write		= tracing_err_log_write,
7676 	.read           = seq_read,
7677 	.llseek         = tracing_lseek,
7678 	.release        = tracing_err_log_release,
7679 };
7680 
tracing_buffers_open(struct inode * inode,struct file * filp)7681 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7682 {
7683 	struct trace_array *tr = inode->i_private;
7684 	struct ftrace_buffer_info *info;
7685 	int ret;
7686 
7687 	ret = tracing_check_open_get_tr(tr);
7688 	if (ret)
7689 		return ret;
7690 
7691 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7692 	if (!info) {
7693 		trace_array_put(tr);
7694 		return -ENOMEM;
7695 	}
7696 
7697 	mutex_lock(&trace_types_lock);
7698 
7699 	info->iter.tr		= tr;
7700 	info->iter.cpu_file	= tracing_get_cpu(inode);
7701 	info->iter.trace	= tr->current_trace;
7702 	info->iter.array_buffer = &tr->array_buffer;
7703 	info->spare		= NULL;
7704 	/* Force reading ring buffer for first read */
7705 	info->read		= (unsigned int)-1;
7706 
7707 	filp->private_data = info;
7708 
7709 	tr->trace_ref++;
7710 
7711 	mutex_unlock(&trace_types_lock);
7712 
7713 	ret = nonseekable_open(inode, filp);
7714 	if (ret < 0)
7715 		trace_array_put(tr);
7716 
7717 	return ret;
7718 }
7719 
7720 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7721 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7722 {
7723 	struct ftrace_buffer_info *info = filp->private_data;
7724 	struct trace_iterator *iter = &info->iter;
7725 
7726 	return trace_poll(iter, filp, poll_table);
7727 }
7728 
7729 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7730 tracing_buffers_read(struct file *filp, char __user *ubuf,
7731 		     size_t count, loff_t *ppos)
7732 {
7733 	struct ftrace_buffer_info *info = filp->private_data;
7734 	struct trace_iterator *iter = &info->iter;
7735 	ssize_t ret = 0;
7736 	ssize_t size;
7737 
7738 	if (!count)
7739 		return 0;
7740 
7741 #ifdef CONFIG_TRACER_MAX_TRACE
7742 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7743 		return -EBUSY;
7744 #endif
7745 
7746 	if (!info->spare) {
7747 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7748 							  iter->cpu_file);
7749 		if (IS_ERR(info->spare)) {
7750 			ret = PTR_ERR(info->spare);
7751 			info->spare = NULL;
7752 		} else {
7753 			info->spare_cpu = iter->cpu_file;
7754 		}
7755 	}
7756 	if (!info->spare)
7757 		return ret;
7758 
7759 	/* Do we have previous read data to read? */
7760 	if (info->read < PAGE_SIZE)
7761 		goto read;
7762 
7763  again:
7764 	trace_access_lock(iter->cpu_file);
7765 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7766 				    &info->spare,
7767 				    count,
7768 				    iter->cpu_file, 0);
7769 	trace_access_unlock(iter->cpu_file);
7770 
7771 	if (ret < 0) {
7772 		if (trace_empty(iter)) {
7773 			if ((filp->f_flags & O_NONBLOCK))
7774 				return -EAGAIN;
7775 
7776 			ret = wait_on_pipe(iter, 0);
7777 			if (ret)
7778 				return ret;
7779 
7780 			goto again;
7781 		}
7782 		return 0;
7783 	}
7784 
7785 	info->read = 0;
7786  read:
7787 	size = PAGE_SIZE - info->read;
7788 	if (size > count)
7789 		size = count;
7790 
7791 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7792 	if (ret == size)
7793 		return -EFAULT;
7794 
7795 	size -= ret;
7796 
7797 	*ppos += size;
7798 	info->read += size;
7799 
7800 	return size;
7801 }
7802 
tracing_buffers_release(struct inode * inode,struct file * file)7803 static int tracing_buffers_release(struct inode *inode, struct file *file)
7804 {
7805 	struct ftrace_buffer_info *info = file->private_data;
7806 	struct trace_iterator *iter = &info->iter;
7807 
7808 	mutex_lock(&trace_types_lock);
7809 
7810 	iter->tr->trace_ref--;
7811 
7812 	__trace_array_put(iter->tr);
7813 
7814 	if (info->spare)
7815 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7816 					   info->spare_cpu, info->spare);
7817 	kvfree(info);
7818 
7819 	mutex_unlock(&trace_types_lock);
7820 
7821 	return 0;
7822 }
7823 
7824 struct buffer_ref {
7825 	struct trace_buffer	*buffer;
7826 	void			*page;
7827 	int			cpu;
7828 	refcount_t		refcount;
7829 };
7830 
buffer_ref_release(struct buffer_ref * ref)7831 static void buffer_ref_release(struct buffer_ref *ref)
7832 {
7833 	if (!refcount_dec_and_test(&ref->refcount))
7834 		return;
7835 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7836 	kfree(ref);
7837 }
7838 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7839 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7840 				    struct pipe_buffer *buf)
7841 {
7842 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7843 
7844 	buffer_ref_release(ref);
7845 	buf->private = 0;
7846 }
7847 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7848 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7849 				struct pipe_buffer *buf)
7850 {
7851 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7852 
7853 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7854 		return false;
7855 
7856 	refcount_inc(&ref->refcount);
7857 	return true;
7858 }
7859 
7860 /* Pipe buffer operations for a buffer. */
7861 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7862 	.release		= buffer_pipe_buf_release,
7863 	.get			= buffer_pipe_buf_get,
7864 };
7865 
7866 /*
7867  * Callback from splice_to_pipe(), if we need to release some pages
7868  * at the end of the spd in case we error'ed out in filling the pipe.
7869  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7870 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7871 {
7872 	struct buffer_ref *ref =
7873 		(struct buffer_ref *)spd->partial[i].private;
7874 
7875 	buffer_ref_release(ref);
7876 	spd->partial[i].private = 0;
7877 }
7878 
7879 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7880 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7881 			    struct pipe_inode_info *pipe, size_t len,
7882 			    unsigned int flags)
7883 {
7884 	struct ftrace_buffer_info *info = file->private_data;
7885 	struct trace_iterator *iter = &info->iter;
7886 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7887 	struct page *pages_def[PIPE_DEF_BUFFERS];
7888 	struct splice_pipe_desc spd = {
7889 		.pages		= pages_def,
7890 		.partial	= partial_def,
7891 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7892 		.ops		= &buffer_pipe_buf_ops,
7893 		.spd_release	= buffer_spd_release,
7894 	};
7895 	struct buffer_ref *ref;
7896 	int entries, i;
7897 	ssize_t ret = 0;
7898 
7899 #ifdef CONFIG_TRACER_MAX_TRACE
7900 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7901 		return -EBUSY;
7902 #endif
7903 
7904 	if (*ppos & (PAGE_SIZE - 1))
7905 		return -EINVAL;
7906 
7907 	if (len & (PAGE_SIZE - 1)) {
7908 		if (len < PAGE_SIZE)
7909 			return -EINVAL;
7910 		len &= PAGE_MASK;
7911 	}
7912 
7913 	if (splice_grow_spd(pipe, &spd))
7914 		return -ENOMEM;
7915 
7916  again:
7917 	trace_access_lock(iter->cpu_file);
7918 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7919 
7920 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7921 		struct page *page;
7922 		int r;
7923 
7924 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7925 		if (!ref) {
7926 			ret = -ENOMEM;
7927 			break;
7928 		}
7929 
7930 		refcount_set(&ref->refcount, 1);
7931 		ref->buffer = iter->array_buffer->buffer;
7932 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7933 		if (IS_ERR(ref->page)) {
7934 			ret = PTR_ERR(ref->page);
7935 			ref->page = NULL;
7936 			kfree(ref);
7937 			break;
7938 		}
7939 		ref->cpu = iter->cpu_file;
7940 
7941 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7942 					  len, iter->cpu_file, 1);
7943 		if (r < 0) {
7944 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7945 						   ref->page);
7946 			kfree(ref);
7947 			break;
7948 		}
7949 
7950 		page = virt_to_page(ref->page);
7951 
7952 		spd.pages[i] = page;
7953 		spd.partial[i].len = PAGE_SIZE;
7954 		spd.partial[i].offset = 0;
7955 		spd.partial[i].private = (unsigned long)ref;
7956 		spd.nr_pages++;
7957 		*ppos += PAGE_SIZE;
7958 
7959 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7960 	}
7961 
7962 	trace_access_unlock(iter->cpu_file);
7963 	spd.nr_pages = i;
7964 
7965 	/* did we read anything? */
7966 	if (!spd.nr_pages) {
7967 		if (ret)
7968 			goto out;
7969 
7970 		ret = -EAGAIN;
7971 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7972 			goto out;
7973 
7974 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
7975 		if (ret)
7976 			goto out;
7977 
7978 		goto again;
7979 	}
7980 
7981 	ret = splice_to_pipe(pipe, &spd);
7982 out:
7983 	splice_shrink_spd(&spd);
7984 
7985 	return ret;
7986 }
7987 
7988 static const struct file_operations tracing_buffers_fops = {
7989 	.open		= tracing_buffers_open,
7990 	.read		= tracing_buffers_read,
7991 	.poll		= tracing_buffers_poll,
7992 	.release	= tracing_buffers_release,
7993 	.splice_read	= tracing_buffers_splice_read,
7994 	.llseek		= no_llseek,
7995 };
7996 
7997 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7998 tracing_stats_read(struct file *filp, char __user *ubuf,
7999 		   size_t count, loff_t *ppos)
8000 {
8001 	struct inode *inode = file_inode(filp);
8002 	struct trace_array *tr = inode->i_private;
8003 	struct array_buffer *trace_buf = &tr->array_buffer;
8004 	int cpu = tracing_get_cpu(inode);
8005 	struct trace_seq *s;
8006 	unsigned long cnt;
8007 	unsigned long long t;
8008 	unsigned long usec_rem;
8009 
8010 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8011 	if (!s)
8012 		return -ENOMEM;
8013 
8014 	trace_seq_init(s);
8015 
8016 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8017 	trace_seq_printf(s, "entries: %ld\n", cnt);
8018 
8019 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8020 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8021 
8022 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8023 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8024 
8025 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8026 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8027 
8028 	if (trace_clocks[tr->clock_id].in_ns) {
8029 		/* local or global for trace_clock */
8030 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8031 		usec_rem = do_div(t, USEC_PER_SEC);
8032 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8033 								t, usec_rem);
8034 
8035 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
8036 		usec_rem = do_div(t, USEC_PER_SEC);
8037 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8038 	} else {
8039 		/* counter or tsc mode for trace_clock */
8040 		trace_seq_printf(s, "oldest event ts: %llu\n",
8041 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8042 
8043 		trace_seq_printf(s, "now ts: %llu\n",
8044 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
8045 	}
8046 
8047 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8048 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8049 
8050 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8051 	trace_seq_printf(s, "read events: %ld\n", cnt);
8052 
8053 	count = simple_read_from_buffer(ubuf, count, ppos,
8054 					s->buffer, trace_seq_used(s));
8055 
8056 	kfree(s);
8057 
8058 	return count;
8059 }
8060 
8061 static const struct file_operations tracing_stats_fops = {
8062 	.open		= tracing_open_generic_tr,
8063 	.read		= tracing_stats_read,
8064 	.llseek		= generic_file_llseek,
8065 	.release	= tracing_release_generic_tr,
8066 };
8067 
8068 #ifdef CONFIG_DYNAMIC_FTRACE
8069 
8070 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8071 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8072 		  size_t cnt, loff_t *ppos)
8073 {
8074 	ssize_t ret;
8075 	char *buf;
8076 	int r;
8077 
8078 	/* 256 should be plenty to hold the amount needed */
8079 	buf = kmalloc(256, GFP_KERNEL);
8080 	if (!buf)
8081 		return -ENOMEM;
8082 
8083 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8084 		      ftrace_update_tot_cnt,
8085 		      ftrace_number_of_pages,
8086 		      ftrace_number_of_groups);
8087 
8088 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8089 	kfree(buf);
8090 	return ret;
8091 }
8092 
8093 static const struct file_operations tracing_dyn_info_fops = {
8094 	.open		= tracing_open_generic,
8095 	.read		= tracing_read_dyn_info,
8096 	.llseek		= generic_file_llseek,
8097 };
8098 #endif /* CONFIG_DYNAMIC_FTRACE */
8099 
8100 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8101 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8102 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8103 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8104 		void *data)
8105 {
8106 	tracing_snapshot_instance(tr);
8107 }
8108 
8109 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8110 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8111 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8112 		      void *data)
8113 {
8114 	struct ftrace_func_mapper *mapper = data;
8115 	long *count = NULL;
8116 
8117 	if (mapper)
8118 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8119 
8120 	if (count) {
8121 
8122 		if (*count <= 0)
8123 			return;
8124 
8125 		(*count)--;
8126 	}
8127 
8128 	tracing_snapshot_instance(tr);
8129 }
8130 
8131 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8132 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8133 		      struct ftrace_probe_ops *ops, void *data)
8134 {
8135 	struct ftrace_func_mapper *mapper = data;
8136 	long *count = NULL;
8137 
8138 	seq_printf(m, "%ps:", (void *)ip);
8139 
8140 	seq_puts(m, "snapshot");
8141 
8142 	if (mapper)
8143 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8144 
8145 	if (count)
8146 		seq_printf(m, ":count=%ld\n", *count);
8147 	else
8148 		seq_puts(m, ":unlimited\n");
8149 
8150 	return 0;
8151 }
8152 
8153 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8154 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8155 		     unsigned long ip, void *init_data, void **data)
8156 {
8157 	struct ftrace_func_mapper *mapper = *data;
8158 
8159 	if (!mapper) {
8160 		mapper = allocate_ftrace_func_mapper();
8161 		if (!mapper)
8162 			return -ENOMEM;
8163 		*data = mapper;
8164 	}
8165 
8166 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8167 }
8168 
8169 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8170 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8171 		     unsigned long ip, void *data)
8172 {
8173 	struct ftrace_func_mapper *mapper = data;
8174 
8175 	if (!ip) {
8176 		if (!mapper)
8177 			return;
8178 		free_ftrace_func_mapper(mapper, NULL);
8179 		return;
8180 	}
8181 
8182 	ftrace_func_mapper_remove_ip(mapper, ip);
8183 }
8184 
8185 static struct ftrace_probe_ops snapshot_probe_ops = {
8186 	.func			= ftrace_snapshot,
8187 	.print			= ftrace_snapshot_print,
8188 };
8189 
8190 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8191 	.func			= ftrace_count_snapshot,
8192 	.print			= ftrace_snapshot_print,
8193 	.init			= ftrace_snapshot_init,
8194 	.free			= ftrace_snapshot_free,
8195 };
8196 
8197 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8198 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8199 			       char *glob, char *cmd, char *param, int enable)
8200 {
8201 	struct ftrace_probe_ops *ops;
8202 	void *count = (void *)-1;
8203 	char *number;
8204 	int ret;
8205 
8206 	if (!tr)
8207 		return -ENODEV;
8208 
8209 	/* hash funcs only work with set_ftrace_filter */
8210 	if (!enable)
8211 		return -EINVAL;
8212 
8213 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8214 
8215 	if (glob[0] == '!')
8216 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8217 
8218 	if (!param)
8219 		goto out_reg;
8220 
8221 	number = strsep(&param, ":");
8222 
8223 	if (!strlen(number))
8224 		goto out_reg;
8225 
8226 	/*
8227 	 * We use the callback data field (which is a pointer)
8228 	 * as our counter.
8229 	 */
8230 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8231 	if (ret)
8232 		return ret;
8233 
8234  out_reg:
8235 	ret = tracing_alloc_snapshot_instance(tr);
8236 	if (ret < 0)
8237 		goto out;
8238 
8239 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8240 
8241  out:
8242 	return ret < 0 ? ret : 0;
8243 }
8244 
8245 static struct ftrace_func_command ftrace_snapshot_cmd = {
8246 	.name			= "snapshot",
8247 	.func			= ftrace_trace_snapshot_callback,
8248 };
8249 
register_snapshot_cmd(void)8250 static __init int register_snapshot_cmd(void)
8251 {
8252 	return register_ftrace_command(&ftrace_snapshot_cmd);
8253 }
8254 #else
register_snapshot_cmd(void)8255 static inline __init int register_snapshot_cmd(void) { return 0; }
8256 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8257 
tracing_get_dentry(struct trace_array * tr)8258 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8259 {
8260 	if (WARN_ON(!tr->dir))
8261 		return ERR_PTR(-ENODEV);
8262 
8263 	/* Top directory uses NULL as the parent */
8264 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8265 		return NULL;
8266 
8267 	/* All sub buffers have a descriptor */
8268 	return tr->dir;
8269 }
8270 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8271 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8272 {
8273 	struct dentry *d_tracer;
8274 
8275 	if (tr->percpu_dir)
8276 		return tr->percpu_dir;
8277 
8278 	d_tracer = tracing_get_dentry(tr);
8279 	if (IS_ERR(d_tracer))
8280 		return NULL;
8281 
8282 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8283 
8284 	MEM_FAIL(!tr->percpu_dir,
8285 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8286 
8287 	return tr->percpu_dir;
8288 }
8289 
8290 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8291 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8292 		      void *data, long cpu, const struct file_operations *fops)
8293 {
8294 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8295 
8296 	if (ret) /* See tracing_get_cpu() */
8297 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8298 	return ret;
8299 }
8300 
8301 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8302 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8303 {
8304 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8305 	struct dentry *d_cpu;
8306 	char cpu_dir[30]; /* 30 characters should be more than enough */
8307 
8308 	if (!d_percpu)
8309 		return;
8310 
8311 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8312 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8313 	if (!d_cpu) {
8314 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8315 		return;
8316 	}
8317 
8318 	/* per cpu trace_pipe */
8319 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8320 				tr, cpu, &tracing_pipe_fops);
8321 
8322 	/* per cpu trace */
8323 	trace_create_cpu_file("trace", 0644, d_cpu,
8324 				tr, cpu, &tracing_fops);
8325 
8326 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8327 				tr, cpu, &tracing_buffers_fops);
8328 
8329 	trace_create_cpu_file("stats", 0444, d_cpu,
8330 				tr, cpu, &tracing_stats_fops);
8331 
8332 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8333 				tr, cpu, &tracing_entries_fops);
8334 
8335 #ifdef CONFIG_TRACER_SNAPSHOT
8336 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8337 				tr, cpu, &snapshot_fops);
8338 
8339 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8340 				tr, cpu, &snapshot_raw_fops);
8341 #endif
8342 }
8343 
8344 #ifdef CONFIG_FTRACE_SELFTEST
8345 /* Let selftest have access to static functions in this file */
8346 #include "trace_selftest.c"
8347 #endif
8348 
8349 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8350 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8351 			loff_t *ppos)
8352 {
8353 	struct trace_option_dentry *topt = filp->private_data;
8354 	char *buf;
8355 
8356 	if (topt->flags->val & topt->opt->bit)
8357 		buf = "1\n";
8358 	else
8359 		buf = "0\n";
8360 
8361 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8362 }
8363 
8364 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8365 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8366 			 loff_t *ppos)
8367 {
8368 	struct trace_option_dentry *topt = filp->private_data;
8369 	unsigned long val;
8370 	int ret;
8371 
8372 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8373 	if (ret)
8374 		return ret;
8375 
8376 	if (val != 0 && val != 1)
8377 		return -EINVAL;
8378 
8379 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8380 		mutex_lock(&trace_types_lock);
8381 		ret = __set_tracer_option(topt->tr, topt->flags,
8382 					  topt->opt, !val);
8383 		mutex_unlock(&trace_types_lock);
8384 		if (ret)
8385 			return ret;
8386 	}
8387 
8388 	*ppos += cnt;
8389 
8390 	return cnt;
8391 }
8392 
tracing_open_options(struct inode * inode,struct file * filp)8393 static int tracing_open_options(struct inode *inode, struct file *filp)
8394 {
8395 	struct trace_option_dentry *topt = inode->i_private;
8396 	int ret;
8397 
8398 	ret = tracing_check_open_get_tr(topt->tr);
8399 	if (ret)
8400 		return ret;
8401 
8402 	filp->private_data = inode->i_private;
8403 	return 0;
8404 }
8405 
tracing_release_options(struct inode * inode,struct file * file)8406 static int tracing_release_options(struct inode *inode, struct file *file)
8407 {
8408 	struct trace_option_dentry *topt = file->private_data;
8409 
8410 	trace_array_put(topt->tr);
8411 	return 0;
8412 }
8413 
8414 static const struct file_operations trace_options_fops = {
8415 	.open = tracing_open_options,
8416 	.read = trace_options_read,
8417 	.write = trace_options_write,
8418 	.llseek	= generic_file_llseek,
8419 	.release = tracing_release_options,
8420 };
8421 
8422 /*
8423  * In order to pass in both the trace_array descriptor as well as the index
8424  * to the flag that the trace option file represents, the trace_array
8425  * has a character array of trace_flags_index[], which holds the index
8426  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8427  * The address of this character array is passed to the flag option file
8428  * read/write callbacks.
8429  *
8430  * In order to extract both the index and the trace_array descriptor,
8431  * get_tr_index() uses the following algorithm.
8432  *
8433  *   idx = *ptr;
8434  *
8435  * As the pointer itself contains the address of the index (remember
8436  * index[1] == 1).
8437  *
8438  * Then to get the trace_array descriptor, by subtracting that index
8439  * from the ptr, we get to the start of the index itself.
8440  *
8441  *   ptr - idx == &index[0]
8442  *
8443  * Then a simple container_of() from that pointer gets us to the
8444  * trace_array descriptor.
8445  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8446 static void get_tr_index(void *data, struct trace_array **ptr,
8447 			 unsigned int *pindex)
8448 {
8449 	*pindex = *(unsigned char *)data;
8450 
8451 	*ptr = container_of(data - *pindex, struct trace_array,
8452 			    trace_flags_index);
8453 }
8454 
8455 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8456 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8457 			loff_t *ppos)
8458 {
8459 	void *tr_index = filp->private_data;
8460 	struct trace_array *tr;
8461 	unsigned int index;
8462 	char *buf;
8463 
8464 	get_tr_index(tr_index, &tr, &index);
8465 
8466 	if (tr->trace_flags & (1 << index))
8467 		buf = "1\n";
8468 	else
8469 		buf = "0\n";
8470 
8471 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8472 }
8473 
8474 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8475 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8476 			 loff_t *ppos)
8477 {
8478 	void *tr_index = filp->private_data;
8479 	struct trace_array *tr;
8480 	unsigned int index;
8481 	unsigned long val;
8482 	int ret;
8483 
8484 	get_tr_index(tr_index, &tr, &index);
8485 
8486 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8487 	if (ret)
8488 		return ret;
8489 
8490 	if (val != 0 && val != 1)
8491 		return -EINVAL;
8492 
8493 	mutex_lock(&event_mutex);
8494 	mutex_lock(&trace_types_lock);
8495 	ret = set_tracer_flag(tr, 1 << index, val);
8496 	mutex_unlock(&trace_types_lock);
8497 	mutex_unlock(&event_mutex);
8498 
8499 	if (ret < 0)
8500 		return ret;
8501 
8502 	*ppos += cnt;
8503 
8504 	return cnt;
8505 }
8506 
8507 static const struct file_operations trace_options_core_fops = {
8508 	.open = tracing_open_generic,
8509 	.read = trace_options_core_read,
8510 	.write = trace_options_core_write,
8511 	.llseek = generic_file_llseek,
8512 };
8513 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8514 struct dentry *trace_create_file(const char *name,
8515 				 umode_t mode,
8516 				 struct dentry *parent,
8517 				 void *data,
8518 				 const struct file_operations *fops)
8519 {
8520 	struct dentry *ret;
8521 
8522 	ret = tracefs_create_file(name, mode, parent, data, fops);
8523 	if (!ret)
8524 		pr_warn("Could not create tracefs '%s' entry\n", name);
8525 
8526 	return ret;
8527 }
8528 
8529 
trace_options_init_dentry(struct trace_array * tr)8530 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8531 {
8532 	struct dentry *d_tracer;
8533 
8534 	if (tr->options)
8535 		return tr->options;
8536 
8537 	d_tracer = tracing_get_dentry(tr);
8538 	if (IS_ERR(d_tracer))
8539 		return NULL;
8540 
8541 	tr->options = tracefs_create_dir("options", d_tracer);
8542 	if (!tr->options) {
8543 		pr_warn("Could not create tracefs directory 'options'\n");
8544 		return NULL;
8545 	}
8546 
8547 	return tr->options;
8548 }
8549 
8550 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8551 create_trace_option_file(struct trace_array *tr,
8552 			 struct trace_option_dentry *topt,
8553 			 struct tracer_flags *flags,
8554 			 struct tracer_opt *opt)
8555 {
8556 	struct dentry *t_options;
8557 
8558 	t_options = trace_options_init_dentry(tr);
8559 	if (!t_options)
8560 		return;
8561 
8562 	topt->flags = flags;
8563 	topt->opt = opt;
8564 	topt->tr = tr;
8565 
8566 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8567 				    &trace_options_fops);
8568 
8569 }
8570 
8571 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8572 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8573 {
8574 	struct trace_option_dentry *topts;
8575 	struct trace_options *tr_topts;
8576 	struct tracer_flags *flags;
8577 	struct tracer_opt *opts;
8578 	int cnt;
8579 	int i;
8580 
8581 	if (!tracer)
8582 		return;
8583 
8584 	flags = tracer->flags;
8585 
8586 	if (!flags || !flags->opts)
8587 		return;
8588 
8589 	/*
8590 	 * If this is an instance, only create flags for tracers
8591 	 * the instance may have.
8592 	 */
8593 	if (!trace_ok_for_array(tracer, tr))
8594 		return;
8595 
8596 	for (i = 0; i < tr->nr_topts; i++) {
8597 		/* Make sure there's no duplicate flags. */
8598 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8599 			return;
8600 	}
8601 
8602 	opts = flags->opts;
8603 
8604 	for (cnt = 0; opts[cnt].name; cnt++)
8605 		;
8606 
8607 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8608 	if (!topts)
8609 		return;
8610 
8611 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8612 			    GFP_KERNEL);
8613 	if (!tr_topts) {
8614 		kfree(topts);
8615 		return;
8616 	}
8617 
8618 	tr->topts = tr_topts;
8619 	tr->topts[tr->nr_topts].tracer = tracer;
8620 	tr->topts[tr->nr_topts].topts = topts;
8621 	tr->nr_topts++;
8622 
8623 	for (cnt = 0; opts[cnt].name; cnt++) {
8624 		create_trace_option_file(tr, &topts[cnt], flags,
8625 					 &opts[cnt]);
8626 		MEM_FAIL(topts[cnt].entry == NULL,
8627 			  "Failed to create trace option: %s",
8628 			  opts[cnt].name);
8629 	}
8630 }
8631 
8632 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8633 create_trace_option_core_file(struct trace_array *tr,
8634 			      const char *option, long index)
8635 {
8636 	struct dentry *t_options;
8637 
8638 	t_options = trace_options_init_dentry(tr);
8639 	if (!t_options)
8640 		return NULL;
8641 
8642 	return trace_create_file(option, 0644, t_options,
8643 				 (void *)&tr->trace_flags_index[index],
8644 				 &trace_options_core_fops);
8645 }
8646 
create_trace_options_dir(struct trace_array * tr)8647 static void create_trace_options_dir(struct trace_array *tr)
8648 {
8649 	struct dentry *t_options;
8650 	bool top_level = tr == &global_trace;
8651 	int i;
8652 
8653 	t_options = trace_options_init_dentry(tr);
8654 	if (!t_options)
8655 		return;
8656 
8657 	for (i = 0; trace_options[i]; i++) {
8658 		if (top_level ||
8659 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8660 			create_trace_option_core_file(tr, trace_options[i], i);
8661 	}
8662 }
8663 
8664 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8665 rb_simple_read(struct file *filp, char __user *ubuf,
8666 	       size_t cnt, loff_t *ppos)
8667 {
8668 	struct trace_array *tr = filp->private_data;
8669 	char buf[64];
8670 	int r;
8671 
8672 	r = tracer_tracing_is_on(tr);
8673 	r = sprintf(buf, "%d\n", r);
8674 
8675 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8676 }
8677 
8678 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8679 rb_simple_write(struct file *filp, const char __user *ubuf,
8680 		size_t cnt, loff_t *ppos)
8681 {
8682 	struct trace_array *tr = filp->private_data;
8683 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8684 	unsigned long val;
8685 	int ret;
8686 
8687 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8688 	if (ret)
8689 		return ret;
8690 
8691 	if (buffer) {
8692 		mutex_lock(&trace_types_lock);
8693 		if (!!val == tracer_tracing_is_on(tr)) {
8694 			val = 0; /* do nothing */
8695 		} else if (val) {
8696 			tracer_tracing_on(tr);
8697 			if (tr->current_trace->start)
8698 				tr->current_trace->start(tr);
8699 		} else {
8700 			tracer_tracing_off(tr);
8701 			if (tr->current_trace->stop)
8702 				tr->current_trace->stop(tr);
8703 		}
8704 		mutex_unlock(&trace_types_lock);
8705 	}
8706 
8707 	(*ppos)++;
8708 
8709 	return cnt;
8710 }
8711 
8712 static const struct file_operations rb_simple_fops = {
8713 	.open		= tracing_open_generic_tr,
8714 	.read		= rb_simple_read,
8715 	.write		= rb_simple_write,
8716 	.release	= tracing_release_generic_tr,
8717 	.llseek		= default_llseek,
8718 };
8719 
8720 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8721 buffer_percent_read(struct file *filp, char __user *ubuf,
8722 		    size_t cnt, loff_t *ppos)
8723 {
8724 	struct trace_array *tr = filp->private_data;
8725 	char buf[64];
8726 	int r;
8727 
8728 	r = tr->buffer_percent;
8729 	r = sprintf(buf, "%d\n", r);
8730 
8731 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8732 }
8733 
8734 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8735 buffer_percent_write(struct file *filp, const char __user *ubuf,
8736 		     size_t cnt, loff_t *ppos)
8737 {
8738 	struct trace_array *tr = filp->private_data;
8739 	unsigned long val;
8740 	int ret;
8741 
8742 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8743 	if (ret)
8744 		return ret;
8745 
8746 	if (val > 100)
8747 		return -EINVAL;
8748 
8749 	tr->buffer_percent = val;
8750 
8751 	(*ppos)++;
8752 
8753 	return cnt;
8754 }
8755 
8756 static const struct file_operations buffer_percent_fops = {
8757 	.open		= tracing_open_generic_tr,
8758 	.read		= buffer_percent_read,
8759 	.write		= buffer_percent_write,
8760 	.release	= tracing_release_generic_tr,
8761 	.llseek		= default_llseek,
8762 };
8763 
8764 static struct dentry *trace_instance_dir;
8765 
8766 static void
8767 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8768 
8769 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8770 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8771 {
8772 	enum ring_buffer_flags rb_flags;
8773 
8774 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8775 
8776 	buf->tr = tr;
8777 
8778 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8779 	if (!buf->buffer)
8780 		return -ENOMEM;
8781 
8782 	buf->data = alloc_percpu(struct trace_array_cpu);
8783 	if (!buf->data) {
8784 		ring_buffer_free(buf->buffer);
8785 		buf->buffer = NULL;
8786 		return -ENOMEM;
8787 	}
8788 
8789 	/* Allocate the first page for all buffers */
8790 	set_buffer_entries(&tr->array_buffer,
8791 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8792 
8793 	return 0;
8794 }
8795 
allocate_trace_buffers(struct trace_array * tr,int size)8796 static int allocate_trace_buffers(struct trace_array *tr, int size)
8797 {
8798 	int ret;
8799 
8800 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8801 	if (ret)
8802 		return ret;
8803 
8804 #ifdef CONFIG_TRACER_MAX_TRACE
8805 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8806 				    allocate_snapshot ? size : 1);
8807 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8808 		ring_buffer_free(tr->array_buffer.buffer);
8809 		tr->array_buffer.buffer = NULL;
8810 		free_percpu(tr->array_buffer.data);
8811 		tr->array_buffer.data = NULL;
8812 		return -ENOMEM;
8813 	}
8814 	tr->allocated_snapshot = allocate_snapshot;
8815 
8816 	/*
8817 	 * Only the top level trace array gets its snapshot allocated
8818 	 * from the kernel command line.
8819 	 */
8820 	allocate_snapshot = false;
8821 #endif
8822 
8823 	return 0;
8824 }
8825 
free_trace_buffer(struct array_buffer * buf)8826 static void free_trace_buffer(struct array_buffer *buf)
8827 {
8828 	if (buf->buffer) {
8829 		ring_buffer_free(buf->buffer);
8830 		buf->buffer = NULL;
8831 		free_percpu(buf->data);
8832 		buf->data = NULL;
8833 	}
8834 }
8835 
free_trace_buffers(struct trace_array * tr)8836 static void free_trace_buffers(struct trace_array *tr)
8837 {
8838 	if (!tr)
8839 		return;
8840 
8841 	free_trace_buffer(&tr->array_buffer);
8842 
8843 #ifdef CONFIG_TRACER_MAX_TRACE
8844 	free_trace_buffer(&tr->max_buffer);
8845 #endif
8846 }
8847 
init_trace_flags_index(struct trace_array * tr)8848 static void init_trace_flags_index(struct trace_array *tr)
8849 {
8850 	int i;
8851 
8852 	/* Used by the trace options files */
8853 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8854 		tr->trace_flags_index[i] = i;
8855 }
8856 
__update_tracer_options(struct trace_array * tr)8857 static void __update_tracer_options(struct trace_array *tr)
8858 {
8859 	struct tracer *t;
8860 
8861 	for (t = trace_types; t; t = t->next)
8862 		add_tracer_options(tr, t);
8863 }
8864 
update_tracer_options(struct trace_array * tr)8865 static void update_tracer_options(struct trace_array *tr)
8866 {
8867 	mutex_lock(&trace_types_lock);
8868 	tracer_options_updated = true;
8869 	__update_tracer_options(tr);
8870 	mutex_unlock(&trace_types_lock);
8871 }
8872 
8873 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8874 struct trace_array *trace_array_find(const char *instance)
8875 {
8876 	struct trace_array *tr, *found = NULL;
8877 
8878 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8879 		if (tr->name && strcmp(tr->name, instance) == 0) {
8880 			found = tr;
8881 			break;
8882 		}
8883 	}
8884 
8885 	return found;
8886 }
8887 
trace_array_find_get(const char * instance)8888 struct trace_array *trace_array_find_get(const char *instance)
8889 {
8890 	struct trace_array *tr;
8891 
8892 	mutex_lock(&trace_types_lock);
8893 	tr = trace_array_find(instance);
8894 	if (tr)
8895 		tr->ref++;
8896 	mutex_unlock(&trace_types_lock);
8897 
8898 	return tr;
8899 }
8900 
trace_array_create_dir(struct trace_array * tr)8901 static int trace_array_create_dir(struct trace_array *tr)
8902 {
8903 	int ret;
8904 
8905 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8906 	if (!tr->dir)
8907 		return -EINVAL;
8908 
8909 	ret = event_trace_add_tracer(tr->dir, tr);
8910 	if (ret) {
8911 		tracefs_remove(tr->dir);
8912 		return ret;
8913 	}
8914 
8915 	init_tracer_tracefs(tr, tr->dir);
8916 	__update_tracer_options(tr);
8917 
8918 	return ret;
8919 }
8920 
trace_array_create(const char * name)8921 static struct trace_array *trace_array_create(const char *name)
8922 {
8923 	struct trace_array *tr;
8924 	int ret;
8925 
8926 	ret = -ENOMEM;
8927 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8928 	if (!tr)
8929 		return ERR_PTR(ret);
8930 
8931 	tr->name = kstrdup(name, GFP_KERNEL);
8932 	if (!tr->name)
8933 		goto out_free_tr;
8934 
8935 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8936 		goto out_free_tr;
8937 
8938 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
8939 		goto out_free_tr;
8940 
8941 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8942 
8943 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8944 
8945 	raw_spin_lock_init(&tr->start_lock);
8946 
8947 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8948 
8949 	tr->current_trace = &nop_trace;
8950 
8951 	INIT_LIST_HEAD(&tr->systems);
8952 	INIT_LIST_HEAD(&tr->events);
8953 	INIT_LIST_HEAD(&tr->hist_vars);
8954 	INIT_LIST_HEAD(&tr->err_log);
8955 
8956 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8957 		goto out_free_tr;
8958 
8959 	if (ftrace_allocate_ftrace_ops(tr) < 0)
8960 		goto out_free_tr;
8961 
8962 	ftrace_init_trace_array(tr);
8963 
8964 	init_trace_flags_index(tr);
8965 
8966 	if (trace_instance_dir) {
8967 		ret = trace_array_create_dir(tr);
8968 		if (ret)
8969 			goto out_free_tr;
8970 	} else
8971 		__trace_early_add_events(tr);
8972 
8973 	list_add(&tr->list, &ftrace_trace_arrays);
8974 
8975 	tr->ref++;
8976 
8977 	return tr;
8978 
8979  out_free_tr:
8980 	ftrace_free_ftrace_ops(tr);
8981 	free_trace_buffers(tr);
8982 	free_cpumask_var(tr->pipe_cpumask);
8983 	free_cpumask_var(tr->tracing_cpumask);
8984 	kfree(tr->name);
8985 	kfree(tr);
8986 
8987 	return ERR_PTR(ret);
8988 }
8989 
instance_mkdir(const char * name)8990 static int instance_mkdir(const char *name)
8991 {
8992 	struct trace_array *tr;
8993 	int ret;
8994 
8995 	mutex_lock(&event_mutex);
8996 	mutex_lock(&trace_types_lock);
8997 
8998 	ret = -EEXIST;
8999 	if (trace_array_find(name))
9000 		goto out_unlock;
9001 
9002 	tr = trace_array_create(name);
9003 
9004 	ret = PTR_ERR_OR_ZERO(tr);
9005 
9006 out_unlock:
9007 	mutex_unlock(&trace_types_lock);
9008 	mutex_unlock(&event_mutex);
9009 	return ret;
9010 }
9011 
9012 /**
9013  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9014  * @name: The name of the trace array to be looked up/created.
9015  *
9016  * Returns pointer to trace array with given name.
9017  * NULL, if it cannot be created.
9018  *
9019  * NOTE: This function increments the reference counter associated with the
9020  * trace array returned. This makes sure it cannot be freed while in use.
9021  * Use trace_array_put() once the trace array is no longer needed.
9022  * If the trace_array is to be freed, trace_array_destroy() needs to
9023  * be called after the trace_array_put(), or simply let user space delete
9024  * it from the tracefs instances directory. But until the
9025  * trace_array_put() is called, user space can not delete it.
9026  *
9027  */
trace_array_get_by_name(const char * name)9028 struct trace_array *trace_array_get_by_name(const char *name)
9029 {
9030 	struct trace_array *tr;
9031 
9032 	mutex_lock(&event_mutex);
9033 	mutex_lock(&trace_types_lock);
9034 
9035 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9036 		if (tr->name && strcmp(tr->name, name) == 0)
9037 			goto out_unlock;
9038 	}
9039 
9040 	tr = trace_array_create(name);
9041 
9042 	if (IS_ERR(tr))
9043 		tr = NULL;
9044 out_unlock:
9045 	if (tr)
9046 		tr->ref++;
9047 
9048 	mutex_unlock(&trace_types_lock);
9049 	mutex_unlock(&event_mutex);
9050 	return tr;
9051 }
9052 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9053 
__remove_instance(struct trace_array * tr)9054 static int __remove_instance(struct trace_array *tr)
9055 {
9056 	int i;
9057 
9058 	/* Reference counter for a newly created trace array = 1. */
9059 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9060 		return -EBUSY;
9061 
9062 	list_del(&tr->list);
9063 
9064 	/* Disable all the flags that were enabled coming in */
9065 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9066 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9067 			set_tracer_flag(tr, 1 << i, 0);
9068 	}
9069 
9070 	tracing_set_nop(tr);
9071 	clear_ftrace_function_probes(tr);
9072 	event_trace_del_tracer(tr);
9073 	ftrace_clear_pids(tr);
9074 	ftrace_destroy_function_files(tr);
9075 	tracefs_remove(tr->dir);
9076 	free_trace_buffers(tr);
9077 	clear_tracing_err_log(tr);
9078 
9079 	for (i = 0; i < tr->nr_topts; i++) {
9080 		kfree(tr->topts[i].topts);
9081 	}
9082 	kfree(tr->topts);
9083 
9084 	free_cpumask_var(tr->pipe_cpumask);
9085 	free_cpumask_var(tr->tracing_cpumask);
9086 	kfree(tr->name);
9087 	kfree(tr);
9088 
9089 	return 0;
9090 }
9091 
trace_array_destroy(struct trace_array * this_tr)9092 int trace_array_destroy(struct trace_array *this_tr)
9093 {
9094 	struct trace_array *tr;
9095 	int ret;
9096 
9097 	if (!this_tr)
9098 		return -EINVAL;
9099 
9100 	mutex_lock(&event_mutex);
9101 	mutex_lock(&trace_types_lock);
9102 
9103 	ret = -ENODEV;
9104 
9105 	/* Making sure trace array exists before destroying it. */
9106 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9107 		if (tr == this_tr) {
9108 			ret = __remove_instance(tr);
9109 			break;
9110 		}
9111 	}
9112 
9113 	mutex_unlock(&trace_types_lock);
9114 	mutex_unlock(&event_mutex);
9115 
9116 	return ret;
9117 }
9118 EXPORT_SYMBOL_GPL(trace_array_destroy);
9119 
instance_rmdir(const char * name)9120 static int instance_rmdir(const char *name)
9121 {
9122 	struct trace_array *tr;
9123 	int ret;
9124 
9125 	mutex_lock(&event_mutex);
9126 	mutex_lock(&trace_types_lock);
9127 
9128 	ret = -ENODEV;
9129 	tr = trace_array_find(name);
9130 	if (tr)
9131 		ret = __remove_instance(tr);
9132 
9133 	mutex_unlock(&trace_types_lock);
9134 	mutex_unlock(&event_mutex);
9135 
9136 	return ret;
9137 }
9138 
create_trace_instances(struct dentry * d_tracer)9139 static __init void create_trace_instances(struct dentry *d_tracer)
9140 {
9141 	struct trace_array *tr;
9142 
9143 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9144 							 instance_mkdir,
9145 							 instance_rmdir);
9146 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9147 		return;
9148 
9149 	mutex_lock(&event_mutex);
9150 	mutex_lock(&trace_types_lock);
9151 
9152 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9153 		if (!tr->name)
9154 			continue;
9155 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9156 			     "Failed to create instance directory\n"))
9157 			break;
9158 	}
9159 
9160 	mutex_unlock(&trace_types_lock);
9161 	mutex_unlock(&event_mutex);
9162 }
9163 
9164 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9165 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9166 {
9167 	struct trace_event_file *file;
9168 	int cpu;
9169 
9170 	trace_create_file("available_tracers", 0444, d_tracer,
9171 			tr, &show_traces_fops);
9172 
9173 	trace_create_file("current_tracer", 0644, d_tracer,
9174 			tr, &set_tracer_fops);
9175 
9176 	trace_create_file("tracing_cpumask", 0644, d_tracer,
9177 			  tr, &tracing_cpumask_fops);
9178 
9179 	trace_create_file("trace_options", 0644, d_tracer,
9180 			  tr, &tracing_iter_fops);
9181 
9182 	trace_create_file("trace", 0644, d_tracer,
9183 			  tr, &tracing_fops);
9184 
9185 	trace_create_file("trace_pipe", 0444, d_tracer,
9186 			  tr, &tracing_pipe_fops);
9187 
9188 	trace_create_file("buffer_size_kb", 0644, d_tracer,
9189 			  tr, &tracing_entries_fops);
9190 
9191 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9192 			  tr, &tracing_total_entries_fops);
9193 
9194 	trace_create_file("free_buffer", 0200, d_tracer,
9195 			  tr, &tracing_free_buffer_fops);
9196 
9197 	trace_create_file("trace_marker", 0220, d_tracer,
9198 			  tr, &tracing_mark_fops);
9199 
9200 	file = __find_event_file(tr, "ftrace", "print");
9201 	if (file && file->dir)
9202 		trace_create_file("trigger", 0644, file->dir, file,
9203 				  &event_trigger_fops);
9204 	tr->trace_marker_file = file;
9205 
9206 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9207 			  tr, &tracing_mark_raw_fops);
9208 
9209 	trace_create_file("trace_clock", 0644, d_tracer, tr,
9210 			  &trace_clock_fops);
9211 
9212 	trace_create_file("tracing_on", 0644, d_tracer,
9213 			  tr, &rb_simple_fops);
9214 
9215 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9216 			  &trace_time_stamp_mode_fops);
9217 
9218 	tr->buffer_percent = 50;
9219 
9220 	trace_create_file("buffer_percent", 0444, d_tracer,
9221 			tr, &buffer_percent_fops);
9222 
9223 	create_trace_options_dir(tr);
9224 
9225 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9226 	trace_create_maxlat_file(tr, d_tracer);
9227 #endif
9228 
9229 	if (ftrace_create_function_files(tr, d_tracer))
9230 		MEM_FAIL(1, "Could not allocate function filter files");
9231 
9232 #ifdef CONFIG_TRACER_SNAPSHOT
9233 	trace_create_file("snapshot", 0644, d_tracer,
9234 			  tr, &snapshot_fops);
9235 #endif
9236 
9237 	trace_create_file("error_log", 0644, d_tracer,
9238 			  tr, &tracing_err_log_fops);
9239 
9240 	for_each_tracing_cpu(cpu)
9241 		tracing_init_tracefs_percpu(tr, cpu);
9242 
9243 	ftrace_init_tracefs(tr, d_tracer);
9244 }
9245 
trace_automount(struct dentry * mntpt,void * ingore)9246 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9247 {
9248 	struct vfsmount *mnt;
9249 	struct file_system_type *type;
9250 
9251 	/*
9252 	 * To maintain backward compatibility for tools that mount
9253 	 * debugfs to get to the tracing facility, tracefs is automatically
9254 	 * mounted to the debugfs/tracing directory.
9255 	 */
9256 	type = get_fs_type("tracefs");
9257 	if (!type)
9258 		return NULL;
9259 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9260 	put_filesystem(type);
9261 	if (IS_ERR(mnt))
9262 		return NULL;
9263 	mntget(mnt);
9264 
9265 	return mnt;
9266 }
9267 
9268 /**
9269  * tracing_init_dentry - initialize top level trace array
9270  *
9271  * This is called when creating files or directories in the tracing
9272  * directory. It is called via fs_initcall() by any of the boot up code
9273  * and expects to return the dentry of the top level tracing directory.
9274  */
tracing_init_dentry(void)9275 int tracing_init_dentry(void)
9276 {
9277 	struct trace_array *tr = &global_trace;
9278 
9279 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9280 		pr_warn("Tracing disabled due to lockdown\n");
9281 		return -EPERM;
9282 	}
9283 
9284 	/* The top level trace array uses  NULL as parent */
9285 	if (tr->dir)
9286 		return 0;
9287 
9288 	if (WARN_ON(!tracefs_initialized()))
9289 		return -ENODEV;
9290 
9291 	/*
9292 	 * As there may still be users that expect the tracing
9293 	 * files to exist in debugfs/tracing, we must automount
9294 	 * the tracefs file system there, so older tools still
9295 	 * work with the newer kerenl.
9296 	 */
9297 	tr->dir = debugfs_create_automount("tracing", NULL,
9298 					   trace_automount, NULL);
9299 
9300 	return 0;
9301 }
9302 
9303 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9304 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9305 
trace_eval_init(void)9306 static void __init trace_eval_init(void)
9307 {
9308 	int len;
9309 
9310 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9311 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9312 }
9313 
9314 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9315 static void trace_module_add_evals(struct module *mod)
9316 {
9317 	if (!mod->num_trace_evals)
9318 		return;
9319 
9320 	/*
9321 	 * Modules with bad taint do not have events created, do
9322 	 * not bother with enums either.
9323 	 */
9324 	if (trace_module_has_bad_taint(mod))
9325 		return;
9326 
9327 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9328 }
9329 
9330 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9331 static void trace_module_remove_evals(struct module *mod)
9332 {
9333 	union trace_eval_map_item *map;
9334 	union trace_eval_map_item **last = &trace_eval_maps;
9335 
9336 	if (!mod->num_trace_evals)
9337 		return;
9338 
9339 	mutex_lock(&trace_eval_mutex);
9340 
9341 	map = trace_eval_maps;
9342 
9343 	while (map) {
9344 		if (map->head.mod == mod)
9345 			break;
9346 		map = trace_eval_jmp_to_tail(map);
9347 		last = &map->tail.next;
9348 		map = map->tail.next;
9349 	}
9350 	if (!map)
9351 		goto out;
9352 
9353 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9354 	kfree(map);
9355  out:
9356 	mutex_unlock(&trace_eval_mutex);
9357 }
9358 #else
trace_module_remove_evals(struct module * mod)9359 static inline void trace_module_remove_evals(struct module *mod) { }
9360 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9361 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9362 static int trace_module_notify(struct notifier_block *self,
9363 			       unsigned long val, void *data)
9364 {
9365 	struct module *mod = data;
9366 
9367 	switch (val) {
9368 	case MODULE_STATE_COMING:
9369 		trace_module_add_evals(mod);
9370 		break;
9371 	case MODULE_STATE_GOING:
9372 		trace_module_remove_evals(mod);
9373 		break;
9374 	}
9375 
9376 	return NOTIFY_OK;
9377 }
9378 
9379 static struct notifier_block trace_module_nb = {
9380 	.notifier_call = trace_module_notify,
9381 	.priority = 0,
9382 };
9383 #endif /* CONFIG_MODULES */
9384 
tracer_init_tracefs(void)9385 static __init int tracer_init_tracefs(void)
9386 {
9387 	int ret;
9388 
9389 	trace_access_lock_init();
9390 
9391 	ret = tracing_init_dentry();
9392 	if (ret)
9393 		return 0;
9394 
9395 	event_trace_init();
9396 
9397 	init_tracer_tracefs(&global_trace, NULL);
9398 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9399 
9400 	trace_create_file("tracing_thresh", 0644, NULL,
9401 			&global_trace, &tracing_thresh_fops);
9402 
9403 	trace_create_file("README", 0444, NULL,
9404 			NULL, &tracing_readme_fops);
9405 
9406 	trace_create_file("saved_cmdlines", 0444, NULL,
9407 			NULL, &tracing_saved_cmdlines_fops);
9408 
9409 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9410 			  NULL, &tracing_saved_cmdlines_size_fops);
9411 
9412 	trace_create_file("saved_tgids", 0444, NULL,
9413 			NULL, &tracing_saved_tgids_fops);
9414 
9415 	trace_eval_init();
9416 
9417 	trace_create_eval_file(NULL);
9418 
9419 #ifdef CONFIG_MODULES
9420 	register_module_notifier(&trace_module_nb);
9421 #endif
9422 
9423 #ifdef CONFIG_DYNAMIC_FTRACE
9424 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9425 			NULL, &tracing_dyn_info_fops);
9426 #endif
9427 
9428 	create_trace_instances(NULL);
9429 
9430 	update_tracer_options(&global_trace);
9431 
9432 	return 0;
9433 }
9434 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9435 static int trace_panic_handler(struct notifier_block *this,
9436 			       unsigned long event, void *unused)
9437 {
9438 	if (ftrace_dump_on_oops)
9439 		ftrace_dump(ftrace_dump_on_oops);
9440 	return NOTIFY_OK;
9441 }
9442 
9443 static struct notifier_block trace_panic_notifier = {
9444 	.notifier_call  = trace_panic_handler,
9445 	.next           = NULL,
9446 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9447 };
9448 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9449 static int trace_die_handler(struct notifier_block *self,
9450 			     unsigned long val,
9451 			     void *data)
9452 {
9453 	switch (val) {
9454 	case DIE_OOPS:
9455 		if (ftrace_dump_on_oops)
9456 			ftrace_dump(ftrace_dump_on_oops);
9457 		break;
9458 	default:
9459 		break;
9460 	}
9461 	return NOTIFY_OK;
9462 }
9463 
9464 static struct notifier_block trace_die_notifier = {
9465 	.notifier_call = trace_die_handler,
9466 	.priority = 200
9467 };
9468 
9469 /*
9470  * printk is set to max of 1024, we really don't need it that big.
9471  * Nothing should be printing 1000 characters anyway.
9472  */
9473 #define TRACE_MAX_PRINT		1000
9474 
9475 /*
9476  * Define here KERN_TRACE so that we have one place to modify
9477  * it if we decide to change what log level the ftrace dump
9478  * should be at.
9479  */
9480 #define KERN_TRACE		KERN_EMERG
9481 
9482 void
trace_printk_seq(struct trace_seq * s)9483 trace_printk_seq(struct trace_seq *s)
9484 {
9485 	/* Probably should print a warning here. */
9486 	if (s->seq.len >= TRACE_MAX_PRINT)
9487 		s->seq.len = TRACE_MAX_PRINT;
9488 
9489 	/*
9490 	 * More paranoid code. Although the buffer size is set to
9491 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9492 	 * an extra layer of protection.
9493 	 */
9494 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9495 		s->seq.len = s->seq.size - 1;
9496 
9497 	/* should be zero ended, but we are paranoid. */
9498 	s->buffer[s->seq.len] = 0;
9499 
9500 	printk(KERN_TRACE "%s", s->buffer);
9501 
9502 	trace_seq_init(s);
9503 }
9504 
trace_init_global_iter(struct trace_iterator * iter)9505 void trace_init_global_iter(struct trace_iterator *iter)
9506 {
9507 	iter->tr = &global_trace;
9508 	iter->trace = iter->tr->current_trace;
9509 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9510 	iter->array_buffer = &global_trace.array_buffer;
9511 
9512 	if (iter->trace && iter->trace->open)
9513 		iter->trace->open(iter);
9514 
9515 	/* Annotate start of buffers if we had overruns */
9516 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9517 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9518 
9519 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9520 	if (trace_clocks[iter->tr->clock_id].in_ns)
9521 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9522 
9523 	/* Can not use kmalloc for iter.temp and iter.fmt */
9524 	iter->temp = static_temp_buf;
9525 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
9526 	iter->fmt = static_fmt_buf;
9527 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
9528 }
9529 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9530 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9531 {
9532 	/* use static because iter can be a bit big for the stack */
9533 	static struct trace_iterator iter;
9534 	static atomic_t dump_running;
9535 	struct trace_array *tr = &global_trace;
9536 	unsigned int old_userobj;
9537 	unsigned long flags;
9538 	int cnt = 0, cpu;
9539 
9540 	/* Only allow one dump user at a time. */
9541 	if (atomic_inc_return(&dump_running) != 1) {
9542 		atomic_dec(&dump_running);
9543 		return;
9544 	}
9545 
9546 	/*
9547 	 * Always turn off tracing when we dump.
9548 	 * We don't need to show trace output of what happens
9549 	 * between multiple crashes.
9550 	 *
9551 	 * If the user does a sysrq-z, then they can re-enable
9552 	 * tracing with echo 1 > tracing_on.
9553 	 */
9554 	tracing_off();
9555 
9556 	local_irq_save(flags);
9557 	printk_nmi_direct_enter();
9558 
9559 	/* Simulate the iterator */
9560 	trace_init_global_iter(&iter);
9561 
9562 	for_each_tracing_cpu(cpu) {
9563 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9564 	}
9565 
9566 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9567 
9568 	/* don't look at user memory in panic mode */
9569 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9570 
9571 	switch (oops_dump_mode) {
9572 	case DUMP_ALL:
9573 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9574 		break;
9575 	case DUMP_ORIG:
9576 		iter.cpu_file = raw_smp_processor_id();
9577 		break;
9578 	case DUMP_NONE:
9579 		goto out_enable;
9580 	default:
9581 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9582 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9583 	}
9584 
9585 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9586 
9587 	/* Did function tracer already get disabled? */
9588 	if (ftrace_is_dead()) {
9589 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9590 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9591 	}
9592 
9593 	/*
9594 	 * We need to stop all tracing on all CPUS to read
9595 	 * the next buffer. This is a bit expensive, but is
9596 	 * not done often. We fill all what we can read,
9597 	 * and then release the locks again.
9598 	 */
9599 
9600 	while (!trace_empty(&iter)) {
9601 
9602 		if (!cnt)
9603 			printk(KERN_TRACE "---------------------------------\n");
9604 
9605 		cnt++;
9606 
9607 		trace_iterator_reset(&iter);
9608 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9609 
9610 		if (trace_find_next_entry_inc(&iter) != NULL) {
9611 			int ret;
9612 
9613 			ret = print_trace_line(&iter);
9614 			if (ret != TRACE_TYPE_NO_CONSUME)
9615 				trace_consume(&iter);
9616 		}
9617 		touch_nmi_watchdog();
9618 
9619 		trace_printk_seq(&iter.seq);
9620 	}
9621 
9622 	if (!cnt)
9623 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9624 	else
9625 		printk(KERN_TRACE "---------------------------------\n");
9626 
9627  out_enable:
9628 	tr->trace_flags |= old_userobj;
9629 
9630 	for_each_tracing_cpu(cpu) {
9631 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9632 	}
9633 	atomic_dec(&dump_running);
9634 	printk_nmi_direct_exit();
9635 	local_irq_restore(flags);
9636 }
9637 EXPORT_SYMBOL_GPL(ftrace_dump);
9638 
trace_run_command(const char * buf,int (* createfn)(int,char **))9639 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9640 {
9641 	char **argv;
9642 	int argc, ret;
9643 
9644 	argc = 0;
9645 	ret = 0;
9646 	argv = argv_split(GFP_KERNEL, buf, &argc);
9647 	if (!argv)
9648 		return -ENOMEM;
9649 
9650 	if (argc)
9651 		ret = createfn(argc, argv);
9652 
9653 	argv_free(argv);
9654 
9655 	return ret;
9656 }
9657 
9658 #define WRITE_BUFSIZE  4096
9659 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9660 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9661 				size_t count, loff_t *ppos,
9662 				int (*createfn)(int, char **))
9663 {
9664 	char *kbuf, *buf, *tmp;
9665 	int ret = 0;
9666 	size_t done = 0;
9667 	size_t size;
9668 
9669 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9670 	if (!kbuf)
9671 		return -ENOMEM;
9672 
9673 	while (done < count) {
9674 		size = count - done;
9675 
9676 		if (size >= WRITE_BUFSIZE)
9677 			size = WRITE_BUFSIZE - 1;
9678 
9679 		if (copy_from_user(kbuf, buffer + done, size)) {
9680 			ret = -EFAULT;
9681 			goto out;
9682 		}
9683 		kbuf[size] = '\0';
9684 		buf = kbuf;
9685 		do {
9686 			tmp = strchr(buf, '\n');
9687 			if (tmp) {
9688 				*tmp = '\0';
9689 				size = tmp - buf + 1;
9690 			} else {
9691 				size = strlen(buf);
9692 				if (done + size < count) {
9693 					if (buf != kbuf)
9694 						break;
9695 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9696 					pr_warn("Line length is too long: Should be less than %d\n",
9697 						WRITE_BUFSIZE - 2);
9698 					ret = -EINVAL;
9699 					goto out;
9700 				}
9701 			}
9702 			done += size;
9703 
9704 			/* Remove comments */
9705 			tmp = strchr(buf, '#');
9706 
9707 			if (tmp)
9708 				*tmp = '\0';
9709 
9710 			ret = trace_run_command(buf, createfn);
9711 			if (ret)
9712 				goto out;
9713 			buf += size;
9714 
9715 		} while (done < count);
9716 	}
9717 	ret = done;
9718 
9719 out:
9720 	kfree(kbuf);
9721 
9722 	return ret;
9723 }
9724 
tracer_alloc_buffers(void)9725 __init static int tracer_alloc_buffers(void)
9726 {
9727 	int ring_buf_size;
9728 	int ret = -ENOMEM;
9729 
9730 
9731 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9732 		pr_warn("Tracing disabled due to lockdown\n");
9733 		return -EPERM;
9734 	}
9735 
9736 	/*
9737 	 * Make sure we don't accidentally add more trace options
9738 	 * than we have bits for.
9739 	 */
9740 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9741 
9742 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9743 		goto out;
9744 
9745 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9746 		goto out_free_buffer_mask;
9747 
9748 	/* Only allocate trace_printk buffers if a trace_printk exists */
9749 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9750 		/* Must be called before global_trace.buffer is allocated */
9751 		trace_printk_init_buffers();
9752 
9753 	/* To save memory, keep the ring buffer size to its minimum */
9754 	if (ring_buffer_expanded)
9755 		ring_buf_size = trace_buf_size;
9756 	else
9757 		ring_buf_size = 1;
9758 
9759 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9760 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9761 
9762 	raw_spin_lock_init(&global_trace.start_lock);
9763 
9764 	/*
9765 	 * The prepare callbacks allocates some memory for the ring buffer. We
9766 	 * don't free the buffer if the CPU goes down. If we were to free
9767 	 * the buffer, then the user would lose any trace that was in the
9768 	 * buffer. The memory will be removed once the "instance" is removed.
9769 	 */
9770 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9771 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9772 				      NULL);
9773 	if (ret < 0)
9774 		goto out_free_cpumask;
9775 	/* Used for event triggers */
9776 	ret = -ENOMEM;
9777 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9778 	if (!temp_buffer)
9779 		goto out_rm_hp_state;
9780 
9781 	if (trace_create_savedcmd() < 0)
9782 		goto out_free_temp_buffer;
9783 
9784 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
9785 		goto out_free_savedcmd;
9786 
9787 	/* TODO: make the number of buffers hot pluggable with CPUS */
9788 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9789 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9790 		goto out_free_pipe_cpumask;
9791 	}
9792 	if (global_trace.buffer_disabled)
9793 		tracing_off();
9794 
9795 	if (trace_boot_clock) {
9796 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9797 		if (ret < 0)
9798 			pr_warn("Trace clock %s not defined, going back to default\n",
9799 				trace_boot_clock);
9800 	}
9801 
9802 	/*
9803 	 * register_tracer() might reference current_trace, so it
9804 	 * needs to be set before we register anything. This is
9805 	 * just a bootstrap of current_trace anyway.
9806 	 */
9807 	global_trace.current_trace = &nop_trace;
9808 
9809 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9810 
9811 	ftrace_init_global_array_ops(&global_trace);
9812 
9813 	init_trace_flags_index(&global_trace);
9814 
9815 	register_tracer(&nop_trace);
9816 
9817 	/* Function tracing may start here (via kernel command line) */
9818 	init_function_trace();
9819 
9820 	/* All seems OK, enable tracing */
9821 	tracing_disabled = 0;
9822 
9823 	atomic_notifier_chain_register(&panic_notifier_list,
9824 				       &trace_panic_notifier);
9825 
9826 	register_die_notifier(&trace_die_notifier);
9827 
9828 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9829 
9830 	INIT_LIST_HEAD(&global_trace.systems);
9831 	INIT_LIST_HEAD(&global_trace.events);
9832 	INIT_LIST_HEAD(&global_trace.hist_vars);
9833 	INIT_LIST_HEAD(&global_trace.err_log);
9834 	list_add(&global_trace.list, &ftrace_trace_arrays);
9835 
9836 	apply_trace_boot_options();
9837 
9838 	register_snapshot_cmd();
9839 
9840 	return 0;
9841 
9842 out_free_pipe_cpumask:
9843 	free_cpumask_var(global_trace.pipe_cpumask);
9844 out_free_savedcmd:
9845 	free_saved_cmdlines_buffer(savedcmd);
9846 out_free_temp_buffer:
9847 	ring_buffer_free(temp_buffer);
9848 out_rm_hp_state:
9849 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9850 out_free_cpumask:
9851 	free_cpumask_var(global_trace.tracing_cpumask);
9852 out_free_buffer_mask:
9853 	free_cpumask_var(tracing_buffer_mask);
9854 out:
9855 	return ret;
9856 }
9857 
early_trace_init(void)9858 void __init early_trace_init(void)
9859 {
9860 	if (tracepoint_printk) {
9861 		tracepoint_print_iter =
9862 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9863 		if (MEM_FAIL(!tracepoint_print_iter,
9864 			     "Failed to allocate trace iterator\n"))
9865 			tracepoint_printk = 0;
9866 		else
9867 			static_key_enable(&tracepoint_printk_key.key);
9868 	}
9869 	tracer_alloc_buffers();
9870 
9871 	init_events();
9872 }
9873 
trace_init(void)9874 void __init trace_init(void)
9875 {
9876 	trace_event_init();
9877 }
9878 
clear_boot_tracer(void)9879 __init static int clear_boot_tracer(void)
9880 {
9881 	/*
9882 	 * The default tracer at boot buffer is an init section.
9883 	 * This function is called in lateinit. If we did not
9884 	 * find the boot tracer, then clear it out, to prevent
9885 	 * later registration from accessing the buffer that is
9886 	 * about to be freed.
9887 	 */
9888 	if (!default_bootup_tracer)
9889 		return 0;
9890 
9891 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9892 	       default_bootup_tracer);
9893 	default_bootup_tracer = NULL;
9894 
9895 	return 0;
9896 }
9897 
9898 fs_initcall(tracer_init_tracefs);
9899 late_initcall_sync(clear_boot_tracer);
9900 
9901 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9902 __init static int tracing_set_default_clock(void)
9903 {
9904 	/* sched_clock_stable() is determined in late_initcall */
9905 	if (!trace_boot_clock && !sched_clock_stable()) {
9906 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9907 			pr_warn("Can not set tracing clock due to lockdown\n");
9908 			return -EPERM;
9909 		}
9910 
9911 		printk(KERN_WARNING
9912 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9913 		       "If you want to keep using the local clock, then add:\n"
9914 		       "  \"trace_clock=local\"\n"
9915 		       "on the kernel command line\n");
9916 		tracing_set_clock(&global_trace, "global");
9917 	}
9918 
9919 	return 0;
9920 }
9921 late_initcall_sync(tracing_set_default_clock);
9922 #endif
9923