• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include "trace.h"
53 #include "trace_output.h"
54 
55 /*
56  * On boot up, the ring buffer is set to the minimum size, so that
57  * we do not waste memory on systems that are not using tracing.
58  */
59 bool ring_buffer_expanded;
60 
61 /*
62  * We need to change this state when a selftest is running.
63  * A selftest will lurk into the ring-buffer to count the
64  * entries inserted during the selftest although some concurrent
65  * insertions into the ring-buffer such as trace_printk could occurred
66  * at the same time, giving false positive or negative results.
67  */
68 static bool __read_mostly tracing_selftest_running;
69 
70 /*
71  * If boot-time tracing including tracers/events via kernel cmdline
72  * is running, we do not want to run SELFTEST.
73  */
74 bool __read_mostly tracing_selftest_disabled;
75 
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 	if (!tracing_selftest_disabled) {
80 		tracing_selftest_disabled = true;
81 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 	}
83 }
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134 
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136 
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139 
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143 	struct module			*mod;
144 	unsigned long			length;
145 };
146 
147 union trace_eval_map_item;
148 
149 struct trace_eval_map_tail {
150 	/*
151 	 * "end" is first and points to NULL as it must be different
152 	 * than "mod" or "eval_string"
153 	 */
154 	union trace_eval_map_item	*next;
155 	const char			*end;	/* points to NULL */
156 };
157 
158 static DEFINE_MUTEX(trace_eval_mutex);
159 
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168 	struct trace_eval_map		map;
169 	struct trace_eval_map_head	head;
170 	struct trace_eval_map_tail	tail;
171 };
172 
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175 
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 				   struct trace_buffer *buffer,
179 				   unsigned long flags, int pc);
180 
181 #define MAX_TRACER_SIZE		100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184 
185 static bool allocate_snapshot;
186 
set_cmdline_ftrace(char * str)187 static int __init set_cmdline_ftrace(char *str)
188 {
189 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 	default_bootup_tracer = bootup_tracer_buf;
191 	/* We are using ftrace early, expand it */
192 	ring_buffer_expanded = true;
193 	return 1;
194 }
195 __setup("ftrace=", set_cmdline_ftrace);
196 
set_ftrace_dump_on_oops(char * str)197 static int __init set_ftrace_dump_on_oops(char *str)
198 {
199 	if (*str++ != '=' || !*str) {
200 		ftrace_dump_on_oops = DUMP_ALL;
201 		return 1;
202 	}
203 
204 	if (!strcmp("orig_cpu", str)) {
205 		ftrace_dump_on_oops = DUMP_ORIG;
206                 return 1;
207         }
208 
209         return 0;
210 }
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212 
stop_trace_on_warning(char * str)213 static int __init stop_trace_on_warning(char *str)
214 {
215 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 		__disable_trace_on_warning = 1;
217 	return 1;
218 }
219 __setup("traceoff_on_warning", stop_trace_on_warning);
220 
boot_alloc_snapshot(char * str)221 static int __init boot_alloc_snapshot(char *str)
222 {
223 	allocate_snapshot = true;
224 	/* We also need the main ring buffer expanded */
225 	ring_buffer_expanded = true;
226 	return 1;
227 }
228 __setup("alloc_snapshot", boot_alloc_snapshot);
229 
230 
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232 
set_trace_boot_options(char * str)233 static int __init set_trace_boot_options(char *str)
234 {
235 	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
236 	return 0;
237 }
238 __setup("trace_options=", set_trace_boot_options);
239 
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
242 
set_trace_boot_clock(char * str)243 static int __init set_trace_boot_clock(char *str)
244 {
245 	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 	trace_boot_clock = trace_boot_clock_buf;
247 	return 0;
248 }
249 __setup("trace_clock=", set_trace_boot_clock);
250 
set_tracepoint_printk(char * str)251 static int __init set_tracepoint_printk(char *str)
252 {
253 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 		tracepoint_printk = 1;
255 	return 1;
256 }
257 __setup("tp_printk", set_tracepoint_printk);
258 
ns2usecs(u64 nsec)259 unsigned long long ns2usecs(u64 nsec)
260 {
261 	nsec += 500;
262 	do_div(nsec, 1000);
263 	return nsec;
264 }
265 
266 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)267 trace_process_export(struct trace_export *export,
268 	       struct ring_buffer_event *event, int flag)
269 {
270 	struct trace_entry *entry;
271 	unsigned int size = 0;
272 
273 	if (export->flags & flag) {
274 		entry = ring_buffer_event_data(event);
275 		size = ring_buffer_event_length(event);
276 		export->write(export, entry, size);
277 	}
278 }
279 
280 static DEFINE_MUTEX(ftrace_export_lock);
281 
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
283 
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
287 
ftrace_exports_enable(struct trace_export * export)288 static inline void ftrace_exports_enable(struct trace_export *export)
289 {
290 	if (export->flags & TRACE_EXPORT_FUNCTION)
291 		static_branch_inc(&trace_function_exports_enabled);
292 
293 	if (export->flags & TRACE_EXPORT_EVENT)
294 		static_branch_inc(&trace_event_exports_enabled);
295 
296 	if (export->flags & TRACE_EXPORT_MARKER)
297 		static_branch_inc(&trace_marker_exports_enabled);
298 }
299 
ftrace_exports_disable(struct trace_export * export)300 static inline void ftrace_exports_disable(struct trace_export *export)
301 {
302 	if (export->flags & TRACE_EXPORT_FUNCTION)
303 		static_branch_dec(&trace_function_exports_enabled);
304 
305 	if (export->flags & TRACE_EXPORT_EVENT)
306 		static_branch_dec(&trace_event_exports_enabled);
307 
308 	if (export->flags & TRACE_EXPORT_MARKER)
309 		static_branch_dec(&trace_marker_exports_enabled);
310 }
311 
ftrace_exports(struct ring_buffer_event * event,int flag)312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
313 {
314 	struct trace_export *export;
315 
316 	preempt_disable_notrace();
317 
318 	export = rcu_dereference_raw_check(ftrace_exports_list);
319 	while (export) {
320 		trace_process_export(export, event, flag);
321 		export = rcu_dereference_raw_check(export->next);
322 	}
323 
324 	preempt_enable_notrace();
325 }
326 
327 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)328 add_trace_export(struct trace_export **list, struct trace_export *export)
329 {
330 	rcu_assign_pointer(export->next, *list);
331 	/*
332 	 * We are entering export into the list but another
333 	 * CPU might be walking that list. We need to make sure
334 	 * the export->next pointer is valid before another CPU sees
335 	 * the export pointer included into the list.
336 	 */
337 	rcu_assign_pointer(*list, export);
338 }
339 
340 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)341 rm_trace_export(struct trace_export **list, struct trace_export *export)
342 {
343 	struct trace_export **p;
344 
345 	for (p = list; *p != NULL; p = &(*p)->next)
346 		if (*p == export)
347 			break;
348 
349 	if (*p != export)
350 		return -1;
351 
352 	rcu_assign_pointer(*p, (*p)->next);
353 
354 	return 0;
355 }
356 
357 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
359 {
360 	ftrace_exports_enable(export);
361 
362 	add_trace_export(list, export);
363 }
364 
365 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
367 {
368 	int ret;
369 
370 	ret = rm_trace_export(list, export);
371 	ftrace_exports_disable(export);
372 
373 	return ret;
374 }
375 
register_ftrace_export(struct trace_export * export)376 int register_ftrace_export(struct trace_export *export)
377 {
378 	if (WARN_ON_ONCE(!export->write))
379 		return -1;
380 
381 	mutex_lock(&ftrace_export_lock);
382 
383 	add_ftrace_export(&ftrace_exports_list, export);
384 
385 	mutex_unlock(&ftrace_export_lock);
386 
387 	return 0;
388 }
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
390 
unregister_ftrace_export(struct trace_export * export)391 int unregister_ftrace_export(struct trace_export *export)
392 {
393 	int ret;
394 
395 	mutex_lock(&ftrace_export_lock);
396 
397 	ret = rm_ftrace_export(&ftrace_exports_list, export);
398 
399 	mutex_unlock(&ftrace_export_lock);
400 
401 	return ret;
402 }
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
404 
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS						\
407 	(FUNCTION_DEFAULT_FLAGS |					\
408 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
409 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
410 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
411 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
412 
413 /* trace_options that are only supported by global_trace */
414 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
415 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
416 
417 /* trace_flags that are default zero for instances */
418 #define ZEROED_TRACE_FLAGS \
419 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
420 
421 /*
422  * The global_trace is the descriptor that holds the top-level tracing
423  * buffers for the live tracing.
424  */
425 static struct trace_array global_trace = {
426 	.trace_flags = TRACE_DEFAULT_FLAGS,
427 };
428 
429 LIST_HEAD(ftrace_trace_arrays);
430 
trace_array_get(struct trace_array * this_tr)431 int trace_array_get(struct trace_array *this_tr)
432 {
433 	struct trace_array *tr;
434 	int ret = -ENODEV;
435 
436 	mutex_lock(&trace_types_lock);
437 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
438 		if (tr == this_tr) {
439 			tr->ref++;
440 			ret = 0;
441 			break;
442 		}
443 	}
444 	mutex_unlock(&trace_types_lock);
445 
446 	return ret;
447 }
448 
__trace_array_put(struct trace_array * this_tr)449 static void __trace_array_put(struct trace_array *this_tr)
450 {
451 	WARN_ON(!this_tr->ref);
452 	this_tr->ref--;
453 }
454 
455 /**
456  * trace_array_put - Decrement the reference counter for this trace array.
457  *
458  * NOTE: Use this when we no longer need the trace array returned by
459  * trace_array_get_by_name(). This ensures the trace array can be later
460  * destroyed.
461  *
462  */
trace_array_put(struct trace_array * this_tr)463 void trace_array_put(struct trace_array *this_tr)
464 {
465 	if (!this_tr)
466 		return;
467 
468 	mutex_lock(&trace_types_lock);
469 	__trace_array_put(this_tr);
470 	mutex_unlock(&trace_types_lock);
471 }
472 EXPORT_SYMBOL_GPL(trace_array_put);
473 
tracing_check_open_get_tr(struct trace_array * tr)474 int tracing_check_open_get_tr(struct trace_array *tr)
475 {
476 	int ret;
477 
478 	ret = security_locked_down(LOCKDOWN_TRACEFS);
479 	if (ret)
480 		return ret;
481 
482 	if (tracing_disabled)
483 		return -ENODEV;
484 
485 	if (tr && trace_array_get(tr) < 0)
486 		return -ENODEV;
487 
488 	return 0;
489 }
490 
call_filter_check_discard(struct trace_event_call * call,void * rec,struct trace_buffer * buffer,struct ring_buffer_event * event)491 int call_filter_check_discard(struct trace_event_call *call, void *rec,
492 			      struct trace_buffer *buffer,
493 			      struct ring_buffer_event *event)
494 {
495 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
496 	    !filter_match_preds(call->filter, rec)) {
497 		__trace_event_discard_commit(buffer, event);
498 		return 1;
499 	}
500 
501 	return 0;
502 }
503 
trace_free_pid_list(struct trace_pid_list * pid_list)504 void trace_free_pid_list(struct trace_pid_list *pid_list)
505 {
506 	vfree(pid_list->pids);
507 	kfree(pid_list);
508 }
509 
510 /**
511  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
512  * @filtered_pids: The list of pids to check
513  * @search_pid: The PID to find in @filtered_pids
514  *
515  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
516  */
517 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)518 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
519 {
520 	/*
521 	 * If pid_max changed after filtered_pids was created, we
522 	 * by default ignore all pids greater than the previous pid_max.
523 	 */
524 	if (search_pid >= filtered_pids->pid_max)
525 		return false;
526 
527 	return test_bit(search_pid, filtered_pids->pids);
528 }
529 
530 /**
531  * trace_ignore_this_task - should a task be ignored for tracing
532  * @filtered_pids: The list of pids to check
533  * @task: The task that should be ignored if not filtered
534  *
535  * Checks if @task should be traced or not from @filtered_pids.
536  * Returns true if @task should *NOT* be traced.
537  * Returns false if @task should be traced.
538  */
539 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)540 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
541 		       struct trace_pid_list *filtered_no_pids,
542 		       struct task_struct *task)
543 {
544 	/*
545 	 * If filterd_no_pids is not empty, and the task's pid is listed
546 	 * in filtered_no_pids, then return true.
547 	 * Otherwise, if filtered_pids is empty, that means we can
548 	 * trace all tasks. If it has content, then only trace pids
549 	 * within filtered_pids.
550 	 */
551 
552 	return (filtered_pids &&
553 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
554 		(filtered_no_pids &&
555 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
556 }
557 
558 /**
559  * trace_filter_add_remove_task - Add or remove a task from a pid_list
560  * @pid_list: The list to modify
561  * @self: The current task for fork or NULL for exit
562  * @task: The task to add or remove
563  *
564  * If adding a task, if @self is defined, the task is only added if @self
565  * is also included in @pid_list. This happens on fork and tasks should
566  * only be added when the parent is listed. If @self is NULL, then the
567  * @task pid will be removed from the list, which would happen on exit
568  * of a task.
569  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)570 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
571 				  struct task_struct *self,
572 				  struct task_struct *task)
573 {
574 	if (!pid_list)
575 		return;
576 
577 	/* For forks, we only add if the forking task is listed */
578 	if (self) {
579 		if (!trace_find_filtered_pid(pid_list, self->pid))
580 			return;
581 	}
582 
583 	/* Sorry, but we don't support pid_max changing after setting */
584 	if (task->pid >= pid_list->pid_max)
585 		return;
586 
587 	/* "self" is set for forks, and NULL for exits */
588 	if (self)
589 		set_bit(task->pid, pid_list->pids);
590 	else
591 		clear_bit(task->pid, pid_list->pids);
592 }
593 
594 /**
595  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
596  * @pid_list: The pid list to show
597  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
598  * @pos: The position of the file
599  *
600  * This is used by the seq_file "next" operation to iterate the pids
601  * listed in a trace_pid_list structure.
602  *
603  * Returns the pid+1 as we want to display pid of zero, but NULL would
604  * stop the iteration.
605  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)606 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
607 {
608 	unsigned long pid = (unsigned long)v;
609 
610 	(*pos)++;
611 
612 	/* pid already is +1 of the actual prevous bit */
613 	pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
614 
615 	/* Return pid + 1 to allow zero to be represented */
616 	if (pid < pid_list->pid_max)
617 		return (void *)(pid + 1);
618 
619 	return NULL;
620 }
621 
622 /**
623  * trace_pid_start - Used for seq_file to start reading pid lists
624  * @pid_list: The pid list to show
625  * @pos: The position of the file
626  *
627  * This is used by seq_file "start" operation to start the iteration
628  * of listing pids.
629  *
630  * Returns the pid+1 as we want to display pid of zero, but NULL would
631  * stop the iteration.
632  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)633 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
634 {
635 	unsigned long pid;
636 	loff_t l = 0;
637 
638 	pid = find_first_bit(pid_list->pids, pid_list->pid_max);
639 	if (pid >= pid_list->pid_max)
640 		return NULL;
641 
642 	/* Return pid + 1 so that zero can be the exit value */
643 	for (pid++; pid && l < *pos;
644 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
645 		;
646 	return (void *)pid;
647 }
648 
649 /**
650  * trace_pid_show - show the current pid in seq_file processing
651  * @m: The seq_file structure to write into
652  * @v: A void pointer of the pid (+1) value to display
653  *
654  * Can be directly used by seq_file operations to display the current
655  * pid value.
656  */
trace_pid_show(struct seq_file * m,void * v)657 int trace_pid_show(struct seq_file *m, void *v)
658 {
659 	unsigned long pid = (unsigned long)v - 1;
660 
661 	seq_printf(m, "%lu\n", pid);
662 	return 0;
663 }
664 
665 /* 128 should be much more than enough */
666 #define PID_BUF_SIZE		127
667 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)668 int trace_pid_write(struct trace_pid_list *filtered_pids,
669 		    struct trace_pid_list **new_pid_list,
670 		    const char __user *ubuf, size_t cnt)
671 {
672 	struct trace_pid_list *pid_list;
673 	struct trace_parser parser;
674 	unsigned long val;
675 	int nr_pids = 0;
676 	ssize_t read = 0;
677 	ssize_t ret = 0;
678 	loff_t pos;
679 	pid_t pid;
680 
681 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
682 		return -ENOMEM;
683 
684 	/*
685 	 * Always recreate a new array. The write is an all or nothing
686 	 * operation. Always create a new array when adding new pids by
687 	 * the user. If the operation fails, then the current list is
688 	 * not modified.
689 	 */
690 	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
691 	if (!pid_list) {
692 		trace_parser_put(&parser);
693 		return -ENOMEM;
694 	}
695 
696 	pid_list->pid_max = READ_ONCE(pid_max);
697 
698 	/* Only truncating will shrink pid_max */
699 	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
700 		pid_list->pid_max = filtered_pids->pid_max;
701 
702 	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
703 	if (!pid_list->pids) {
704 		trace_parser_put(&parser);
705 		kfree(pid_list);
706 		return -ENOMEM;
707 	}
708 
709 	if (filtered_pids) {
710 		/* copy the current bits to the new max */
711 		for_each_set_bit(pid, filtered_pids->pids,
712 				 filtered_pids->pid_max) {
713 			set_bit(pid, pid_list->pids);
714 			nr_pids++;
715 		}
716 	}
717 
718 	while (cnt > 0) {
719 
720 		pos = 0;
721 
722 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
723 		if (ret < 0 || !trace_parser_loaded(&parser))
724 			break;
725 
726 		read += ret;
727 		ubuf += ret;
728 		cnt -= ret;
729 
730 		ret = -EINVAL;
731 		if (kstrtoul(parser.buffer, 0, &val))
732 			break;
733 		if (val >= pid_list->pid_max)
734 			break;
735 
736 		pid = (pid_t)val;
737 
738 		set_bit(pid, pid_list->pids);
739 		nr_pids++;
740 
741 		trace_parser_clear(&parser);
742 		ret = 0;
743 	}
744 	trace_parser_put(&parser);
745 
746 	if (ret < 0) {
747 		trace_free_pid_list(pid_list);
748 		return ret;
749 	}
750 
751 	if (!nr_pids) {
752 		/* Cleared the list of pids */
753 		trace_free_pid_list(pid_list);
754 		read = ret;
755 		pid_list = NULL;
756 	}
757 
758 	*new_pid_list = pid_list;
759 
760 	return read;
761 }
762 
buffer_ftrace_now(struct array_buffer * buf,int cpu)763 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
764 {
765 	u64 ts;
766 
767 	/* Early boot up does not have a buffer yet */
768 	if (!buf->buffer)
769 		return trace_clock_local();
770 
771 	ts = ring_buffer_time_stamp(buf->buffer, cpu);
772 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
773 
774 	return ts;
775 }
776 
ftrace_now(int cpu)777 u64 ftrace_now(int cpu)
778 {
779 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
780 }
781 
782 /**
783  * tracing_is_enabled - Show if global_trace has been disabled
784  *
785  * Shows if the global trace has been enabled or not. It uses the
786  * mirror flag "buffer_disabled" to be used in fast paths such as for
787  * the irqsoff tracer. But it may be inaccurate due to races. If you
788  * need to know the accurate state, use tracing_is_on() which is a little
789  * slower, but accurate.
790  */
tracing_is_enabled(void)791 int tracing_is_enabled(void)
792 {
793 	/*
794 	 * For quick access (irqsoff uses this in fast path), just
795 	 * return the mirror variable of the state of the ring buffer.
796 	 * It's a little racy, but we don't really care.
797 	 */
798 	smp_rmb();
799 	return !global_trace.buffer_disabled;
800 }
801 
802 /*
803  * trace_buf_size is the size in bytes that is allocated
804  * for a buffer. Note, the number of bytes is always rounded
805  * to page size.
806  *
807  * This number is purposely set to a low number of 16384.
808  * If the dump on oops happens, it will be much appreciated
809  * to not have to wait for all that output. Anyway this can be
810  * boot time and run time configurable.
811  */
812 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
813 
814 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
815 
816 /* trace_types holds a link list of available tracers. */
817 static struct tracer		*trace_types __read_mostly;
818 
819 /*
820  * trace_types_lock is used to protect the trace_types list.
821  */
822 DEFINE_MUTEX(trace_types_lock);
823 
824 /*
825  * serialize the access of the ring buffer
826  *
827  * ring buffer serializes readers, but it is low level protection.
828  * The validity of the events (which returns by ring_buffer_peek() ..etc)
829  * are not protected by ring buffer.
830  *
831  * The content of events may become garbage if we allow other process consumes
832  * these events concurrently:
833  *   A) the page of the consumed events may become a normal page
834  *      (not reader page) in ring buffer, and this page will be rewrited
835  *      by events producer.
836  *   B) The page of the consumed events may become a page for splice_read,
837  *      and this page will be returned to system.
838  *
839  * These primitives allow multi process access to different cpu ring buffer
840  * concurrently.
841  *
842  * These primitives don't distinguish read-only and read-consume access.
843  * Multi read-only access are also serialized.
844  */
845 
846 #ifdef CONFIG_SMP
847 static DECLARE_RWSEM(all_cpu_access_lock);
848 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
849 
trace_access_lock(int cpu)850 static inline void trace_access_lock(int cpu)
851 {
852 	if (cpu == RING_BUFFER_ALL_CPUS) {
853 		/* gain it for accessing the whole ring buffer. */
854 		down_write(&all_cpu_access_lock);
855 	} else {
856 		/* gain it for accessing a cpu ring buffer. */
857 
858 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
859 		down_read(&all_cpu_access_lock);
860 
861 		/* Secondly block other access to this @cpu ring buffer. */
862 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
863 	}
864 }
865 
trace_access_unlock(int cpu)866 static inline void trace_access_unlock(int cpu)
867 {
868 	if (cpu == RING_BUFFER_ALL_CPUS) {
869 		up_write(&all_cpu_access_lock);
870 	} else {
871 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
872 		up_read(&all_cpu_access_lock);
873 	}
874 }
875 
trace_access_lock_init(void)876 static inline void trace_access_lock_init(void)
877 {
878 	int cpu;
879 
880 	for_each_possible_cpu(cpu)
881 		mutex_init(&per_cpu(cpu_access_lock, cpu));
882 }
883 
884 #else
885 
886 static DEFINE_MUTEX(access_lock);
887 
trace_access_lock(int cpu)888 static inline void trace_access_lock(int cpu)
889 {
890 	(void)cpu;
891 	mutex_lock(&access_lock);
892 }
893 
trace_access_unlock(int cpu)894 static inline void trace_access_unlock(int cpu)
895 {
896 	(void)cpu;
897 	mutex_unlock(&access_lock);
898 }
899 
trace_access_lock_init(void)900 static inline void trace_access_lock_init(void)
901 {
902 }
903 
904 #endif
905 
906 #ifdef CONFIG_STACKTRACE
907 static void __ftrace_trace_stack(struct trace_buffer *buffer,
908 				 unsigned long flags,
909 				 int skip, int pc, struct pt_regs *regs);
910 static inline void ftrace_trace_stack(struct trace_array *tr,
911 				      struct trace_buffer *buffer,
912 				      unsigned long flags,
913 				      int skip, int pc, struct pt_regs *regs);
914 
915 #else
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)916 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
917 					unsigned long flags,
918 					int skip, int pc, struct pt_regs *regs)
919 {
920 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)921 static inline void ftrace_trace_stack(struct trace_array *tr,
922 				      struct trace_buffer *buffer,
923 				      unsigned long flags,
924 				      int skip, int pc, struct pt_regs *regs)
925 {
926 }
927 
928 #endif
929 
930 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned long flags,int pc)931 trace_event_setup(struct ring_buffer_event *event,
932 		  int type, unsigned long flags, int pc)
933 {
934 	struct trace_entry *ent = ring_buffer_event_data(event);
935 
936 	tracing_generic_entry_update(ent, type, flags, pc);
937 }
938 
939 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)940 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
941 			  int type,
942 			  unsigned long len,
943 			  unsigned long flags, int pc)
944 {
945 	struct ring_buffer_event *event;
946 
947 	event = ring_buffer_lock_reserve(buffer, len);
948 	if (event != NULL)
949 		trace_event_setup(event, type, flags, pc);
950 
951 	return event;
952 }
953 
tracer_tracing_on(struct trace_array * tr)954 void tracer_tracing_on(struct trace_array *tr)
955 {
956 	if (tr->array_buffer.buffer)
957 		ring_buffer_record_on(tr->array_buffer.buffer);
958 	/*
959 	 * This flag is looked at when buffers haven't been allocated
960 	 * yet, or by some tracers (like irqsoff), that just want to
961 	 * know if the ring buffer has been disabled, but it can handle
962 	 * races of where it gets disabled but we still do a record.
963 	 * As the check is in the fast path of the tracers, it is more
964 	 * important to be fast than accurate.
965 	 */
966 	tr->buffer_disabled = 0;
967 	/* Make the flag seen by readers */
968 	smp_wmb();
969 }
970 
971 /**
972  * tracing_on - enable tracing buffers
973  *
974  * This function enables tracing buffers that may have been
975  * disabled with tracing_off.
976  */
tracing_on(void)977 void tracing_on(void)
978 {
979 	tracer_tracing_on(&global_trace);
980 }
981 EXPORT_SYMBOL_GPL(tracing_on);
982 
983 
984 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)985 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
986 {
987 	__this_cpu_write(trace_taskinfo_save, true);
988 
989 	/* If this is the temp buffer, we need to commit fully */
990 	if (this_cpu_read(trace_buffered_event) == event) {
991 		/* Length is in event->array[0] */
992 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
993 		/* Release the temp buffer */
994 		this_cpu_dec(trace_buffered_event_cnt);
995 	} else
996 		ring_buffer_unlock_commit(buffer, event);
997 }
998 
999 /**
1000  * __trace_puts - write a constant string into the trace buffer.
1001  * @ip:	   The address of the caller
1002  * @str:   The constant string to write
1003  * @size:  The size of the string.
1004  */
__trace_puts(unsigned long ip,const char * str,int size)1005 int __trace_puts(unsigned long ip, const char *str, int size)
1006 {
1007 	struct ring_buffer_event *event;
1008 	struct trace_buffer *buffer;
1009 	struct print_entry *entry;
1010 	unsigned long irq_flags;
1011 	int alloc;
1012 	int pc;
1013 
1014 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1015 		return 0;
1016 
1017 	pc = preempt_count();
1018 
1019 	if (unlikely(tracing_selftest_running || tracing_disabled))
1020 		return 0;
1021 
1022 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1023 
1024 	local_save_flags(irq_flags);
1025 	buffer = global_trace.array_buffer.buffer;
1026 	ring_buffer_nest_start(buffer);
1027 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1028 					    irq_flags, pc);
1029 	if (!event) {
1030 		size = 0;
1031 		goto out;
1032 	}
1033 
1034 	entry = ring_buffer_event_data(event);
1035 	entry->ip = ip;
1036 
1037 	memcpy(&entry->buf, str, size);
1038 
1039 	/* Add a newline if necessary */
1040 	if (entry->buf[size - 1] != '\n') {
1041 		entry->buf[size] = '\n';
1042 		entry->buf[size + 1] = '\0';
1043 	} else
1044 		entry->buf[size] = '\0';
1045 
1046 	__buffer_unlock_commit(buffer, event);
1047 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1048  out:
1049 	ring_buffer_nest_end(buffer);
1050 	return size;
1051 }
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1053 
1054 /**
1055  * __trace_bputs - write the pointer to a constant string into trace buffer
1056  * @ip:	   The address of the caller
1057  * @str:   The constant string to write to the buffer to
1058  */
__trace_bputs(unsigned long ip,const char * str)1059 int __trace_bputs(unsigned long ip, const char *str)
1060 {
1061 	struct ring_buffer_event *event;
1062 	struct trace_buffer *buffer;
1063 	struct bputs_entry *entry;
1064 	unsigned long irq_flags;
1065 	int size = sizeof(struct bputs_entry);
1066 	int ret = 0;
1067 	int pc;
1068 
1069 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1070 		return 0;
1071 
1072 	pc = preempt_count();
1073 
1074 	if (unlikely(tracing_selftest_running || tracing_disabled))
1075 		return 0;
1076 
1077 	local_save_flags(irq_flags);
1078 	buffer = global_trace.array_buffer.buffer;
1079 
1080 	ring_buffer_nest_start(buffer);
1081 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082 					    irq_flags, pc);
1083 	if (!event)
1084 		goto out;
1085 
1086 	entry = ring_buffer_event_data(event);
1087 	entry->ip			= ip;
1088 	entry->str			= str;
1089 
1090 	__buffer_unlock_commit(buffer, event);
1091 	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1092 
1093 	ret = 1;
1094  out:
1095 	ring_buffer_nest_end(buffer);
1096 	return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099 
1100 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102 					   void *cond_data)
1103 {
1104 	struct tracer *tracer = tr->current_trace;
1105 	unsigned long flags;
1106 
1107 	if (in_nmi()) {
1108 		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109 		internal_trace_puts("*** snapshot is being ignored        ***\n");
1110 		return;
1111 	}
1112 
1113 	if (!tr->allocated_snapshot) {
1114 		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115 		internal_trace_puts("*** stopping trace here!   ***\n");
1116 		tracing_off();
1117 		return;
1118 	}
1119 
1120 	/* Note, snapshot can not be used when the tracer uses it */
1121 	if (tracer->use_max_tr) {
1122 		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123 		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124 		return;
1125 	}
1126 
1127 	local_irq_save(flags);
1128 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1129 	local_irq_restore(flags);
1130 }
1131 
tracing_snapshot_instance(struct trace_array * tr)1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134 	tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136 
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
tracing_snapshot(void)1151 void tracing_snapshot(void)
1152 {
1153 	struct trace_array *tr = &global_trace;
1154 
1155 	tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158 
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:		The tracing instance to snapshot
1162  * @cond_data:	The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174 	tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177 
1178 /**
1179  * tracing_snapshot_cond_data - get the user data associated with a snapshot
1180  * @tr:		The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
tracing_cond_snapshot_data(struct trace_array * tr)1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194 	void *cond_data = NULL;
1195 
1196 	arch_spin_lock(&tr->max_lock);
1197 
1198 	if (tr->cond_snapshot)
1199 		cond_data = tr->cond_snapshot->cond_data;
1200 
1201 	arch_spin_unlock(&tr->max_lock);
1202 
1203 	return cond_data;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206 
1207 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1208 					struct array_buffer *size_buf, int cpu_id);
1209 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1210 
tracing_alloc_snapshot_instance(struct trace_array * tr)1211 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 {
1213 	int ret;
1214 
1215 	if (!tr->allocated_snapshot) {
1216 
1217 		/* allocate spare buffer */
1218 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1219 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220 		if (ret < 0)
1221 			return ret;
1222 
1223 		tr->allocated_snapshot = true;
1224 	}
1225 
1226 	return 0;
1227 }
1228 
free_snapshot(struct trace_array * tr)1229 static void free_snapshot(struct trace_array *tr)
1230 {
1231 	/*
1232 	 * We don't free the ring buffer. instead, resize it because
1233 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1234 	 * we want preserve it.
1235 	 */
1236 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1237 	set_buffer_entries(&tr->max_buffer, 1);
1238 	tracing_reset_online_cpus(&tr->max_buffer);
1239 	tr->allocated_snapshot = false;
1240 }
1241 
1242 /**
1243  * tracing_alloc_snapshot - allocate snapshot buffer.
1244  *
1245  * This only allocates the snapshot buffer if it isn't already
1246  * allocated - it doesn't also take a snapshot.
1247  *
1248  * This is meant to be used in cases where the snapshot buffer needs
1249  * to be set up for events that can't sleep but need to be able to
1250  * trigger a snapshot.
1251  */
tracing_alloc_snapshot(void)1252 int tracing_alloc_snapshot(void)
1253 {
1254 	struct trace_array *tr = &global_trace;
1255 	int ret;
1256 
1257 	ret = tracing_alloc_snapshot_instance(tr);
1258 	WARN_ON(ret < 0);
1259 
1260 	return ret;
1261 }
1262 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1263 
1264 /**
1265  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1266  *
1267  * This is similar to tracing_snapshot(), but it will allocate the
1268  * snapshot buffer if it isn't already allocated. Use this only
1269  * where it is safe to sleep, as the allocation may sleep.
1270  *
1271  * This causes a swap between the snapshot buffer and the current live
1272  * tracing buffer. You can use this to take snapshots of the live
1273  * trace when some condition is triggered, but continue to trace.
1274  */
tracing_snapshot_alloc(void)1275 void tracing_snapshot_alloc(void)
1276 {
1277 	int ret;
1278 
1279 	ret = tracing_alloc_snapshot();
1280 	if (ret < 0)
1281 		return;
1282 
1283 	tracing_snapshot();
1284 }
1285 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1286 
1287 /**
1288  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1289  * @tr:		The tracing instance
1290  * @cond_data:	User data to associate with the snapshot
1291  * @update:	Implementation of the cond_snapshot update function
1292  *
1293  * Check whether the conditional snapshot for the given instance has
1294  * already been enabled, or if the current tracer is already using a
1295  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1296  * save the cond_data and update function inside.
1297  *
1298  * Returns 0 if successful, error otherwise.
1299  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1300 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1301 				 cond_update_fn_t update)
1302 {
1303 	struct cond_snapshot *cond_snapshot;
1304 	int ret = 0;
1305 
1306 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307 	if (!cond_snapshot)
1308 		return -ENOMEM;
1309 
1310 	cond_snapshot->cond_data = cond_data;
1311 	cond_snapshot->update = update;
1312 
1313 	mutex_lock(&trace_types_lock);
1314 
1315 	ret = tracing_alloc_snapshot_instance(tr);
1316 	if (ret)
1317 		goto fail_unlock;
1318 
1319 	if (tr->current_trace->use_max_tr) {
1320 		ret = -EBUSY;
1321 		goto fail_unlock;
1322 	}
1323 
1324 	/*
1325 	 * The cond_snapshot can only change to NULL without the
1326 	 * trace_types_lock. We don't care if we race with it going
1327 	 * to NULL, but we want to make sure that it's not set to
1328 	 * something other than NULL when we get here, which we can
1329 	 * do safely with only holding the trace_types_lock and not
1330 	 * having to take the max_lock.
1331 	 */
1332 	if (tr->cond_snapshot) {
1333 		ret = -EBUSY;
1334 		goto fail_unlock;
1335 	}
1336 
1337 	arch_spin_lock(&tr->max_lock);
1338 	tr->cond_snapshot = cond_snapshot;
1339 	arch_spin_unlock(&tr->max_lock);
1340 
1341 	mutex_unlock(&trace_types_lock);
1342 
1343 	return ret;
1344 
1345  fail_unlock:
1346 	mutex_unlock(&trace_types_lock);
1347 	kfree(cond_snapshot);
1348 	return ret;
1349 }
1350 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1351 
1352 /**
1353  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1354  * @tr:		The tracing instance
1355  *
1356  * Check whether the conditional snapshot for the given instance is
1357  * enabled; if so, free the cond_snapshot associated with it,
1358  * otherwise return -EINVAL.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
tracing_snapshot_cond_disable(struct trace_array * tr)1362 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 {
1364 	int ret = 0;
1365 
1366 	arch_spin_lock(&tr->max_lock);
1367 
1368 	if (!tr->cond_snapshot)
1369 		ret = -EINVAL;
1370 	else {
1371 		kfree(tr->cond_snapshot);
1372 		tr->cond_snapshot = NULL;
1373 	}
1374 
1375 	arch_spin_unlock(&tr->max_lock);
1376 
1377 	return ret;
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1380 #else
tracing_snapshot(void)1381 void tracing_snapshot(void)
1382 {
1383 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1386 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1387 {
1388 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1391 int tracing_alloc_snapshot(void)
1392 {
1393 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1394 	return -ENODEV;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1397 void tracing_snapshot_alloc(void)
1398 {
1399 	/* Give warning */
1400 	tracing_snapshot();
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1403 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 {
1405 	return NULL;
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1408 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 {
1410 	return -ENODEV;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1413 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 {
1415 	return false;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1418 #endif /* CONFIG_TRACER_SNAPSHOT */
1419 
tracer_tracing_off(struct trace_array * tr)1420 void tracer_tracing_off(struct trace_array *tr)
1421 {
1422 	if (tr->array_buffer.buffer)
1423 		ring_buffer_record_off(tr->array_buffer.buffer);
1424 	/*
1425 	 * This flag is looked at when buffers haven't been allocated
1426 	 * yet, or by some tracers (like irqsoff), that just want to
1427 	 * know if the ring buffer has been disabled, but it can handle
1428 	 * races of where it gets disabled but we still do a record.
1429 	 * As the check is in the fast path of the tracers, it is more
1430 	 * important to be fast than accurate.
1431 	 */
1432 	tr->buffer_disabled = 1;
1433 	/* Make the flag seen by readers */
1434 	smp_wmb();
1435 }
1436 
1437 /**
1438  * tracing_off - turn off tracing buffers
1439  *
1440  * This function stops the tracing buffers from recording data.
1441  * It does not disable any overhead the tracers themselves may
1442  * be causing. This function simply causes all recording to
1443  * the ring buffers to fail.
1444  */
tracing_off(void)1445 void tracing_off(void)
1446 {
1447 	tracer_tracing_off(&global_trace);
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_off);
1450 
disable_trace_on_warning(void)1451 void disable_trace_on_warning(void)
1452 {
1453 	if (__disable_trace_on_warning) {
1454 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1455 			"Disabling tracing due to warning\n");
1456 		tracing_off();
1457 	}
1458 }
1459 
1460 /**
1461  * tracer_tracing_is_on - show real state of ring buffer enabled
1462  * @tr : the trace array to know if ring buffer is enabled
1463  *
1464  * Shows real state of the ring buffer if it is enabled or not.
1465  */
tracer_tracing_is_on(struct trace_array * tr)1466 bool tracer_tracing_is_on(struct trace_array *tr)
1467 {
1468 	if (tr->array_buffer.buffer)
1469 		return ring_buffer_record_is_on(tr->array_buffer.buffer);
1470 	return !tr->buffer_disabled;
1471 }
1472 
1473 /**
1474  * tracing_is_on - show state of ring buffers enabled
1475  */
tracing_is_on(void)1476 int tracing_is_on(void)
1477 {
1478 	return tracer_tracing_is_on(&global_trace);
1479 }
1480 EXPORT_SYMBOL_GPL(tracing_is_on);
1481 
set_buf_size(char * str)1482 static int __init set_buf_size(char *str)
1483 {
1484 	unsigned long buf_size;
1485 
1486 	if (!str)
1487 		return 0;
1488 	buf_size = memparse(str, &str);
1489 	/* nr_entries can not be zero */
1490 	if (buf_size == 0)
1491 		return 0;
1492 	trace_buf_size = buf_size;
1493 	return 1;
1494 }
1495 __setup("trace_buf_size=", set_buf_size);
1496 
set_tracing_thresh(char * str)1497 static int __init set_tracing_thresh(char *str)
1498 {
1499 	unsigned long threshold;
1500 	int ret;
1501 
1502 	if (!str)
1503 		return 0;
1504 	ret = kstrtoul(str, 0, &threshold);
1505 	if (ret < 0)
1506 		return 0;
1507 	tracing_thresh = threshold * 1000;
1508 	return 1;
1509 }
1510 __setup("tracing_thresh=", set_tracing_thresh);
1511 
nsecs_to_usecs(unsigned long nsecs)1512 unsigned long nsecs_to_usecs(unsigned long nsecs)
1513 {
1514 	return nsecs / 1000;
1515 }
1516 
1517 /*
1518  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1519  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1520  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1521  * of strings in the order that the evals (enum) were defined.
1522  */
1523 #undef C
1524 #define C(a, b) b
1525 
1526 /* These must match the bit postions in trace_iterator_flags */
1527 static const char *trace_options[] = {
1528 	TRACE_FLAGS
1529 	NULL
1530 };
1531 
1532 static struct {
1533 	u64 (*func)(void);
1534 	const char *name;
1535 	int in_ns;		/* is this clock in nanoseconds? */
1536 } trace_clocks[] = {
1537 	{ trace_clock_local,		"local",	1 },
1538 	{ trace_clock_global,		"global",	1 },
1539 	{ trace_clock_counter,		"counter",	0 },
1540 	{ trace_clock_jiffies,		"uptime",	0 },
1541 	{ trace_clock,			"perf",		1 },
1542 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1543 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1544 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1545 	ARCH_TRACE_CLOCKS
1546 };
1547 
trace_clock_in_ns(struct trace_array * tr)1548 bool trace_clock_in_ns(struct trace_array *tr)
1549 {
1550 	if (trace_clocks[tr->clock_id].in_ns)
1551 		return true;
1552 
1553 	return false;
1554 }
1555 
1556 /*
1557  * trace_parser_get_init - gets the buffer for trace parser
1558  */
trace_parser_get_init(struct trace_parser * parser,int size)1559 int trace_parser_get_init(struct trace_parser *parser, int size)
1560 {
1561 	memset(parser, 0, sizeof(*parser));
1562 
1563 	parser->buffer = kmalloc(size, GFP_KERNEL);
1564 	if (!parser->buffer)
1565 		return 1;
1566 
1567 	parser->size = size;
1568 	return 0;
1569 }
1570 
1571 /*
1572  * trace_parser_put - frees the buffer for trace parser
1573  */
trace_parser_put(struct trace_parser * parser)1574 void trace_parser_put(struct trace_parser *parser)
1575 {
1576 	kfree(parser->buffer);
1577 	parser->buffer = NULL;
1578 }
1579 
1580 /*
1581  * trace_get_user - reads the user input string separated by  space
1582  * (matched by isspace(ch))
1583  *
1584  * For each string found the 'struct trace_parser' is updated,
1585  * and the function returns.
1586  *
1587  * Returns number of bytes read.
1588  *
1589  * See kernel/trace/trace.h for 'struct trace_parser' details.
1590  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1591 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1592 	size_t cnt, loff_t *ppos)
1593 {
1594 	char ch;
1595 	size_t read = 0;
1596 	ssize_t ret;
1597 
1598 	if (!*ppos)
1599 		trace_parser_clear(parser);
1600 
1601 	ret = get_user(ch, ubuf++);
1602 	if (ret)
1603 		goto out;
1604 
1605 	read++;
1606 	cnt--;
1607 
1608 	/*
1609 	 * The parser is not finished with the last write,
1610 	 * continue reading the user input without skipping spaces.
1611 	 */
1612 	if (!parser->cont) {
1613 		/* skip white space */
1614 		while (cnt && isspace(ch)) {
1615 			ret = get_user(ch, ubuf++);
1616 			if (ret)
1617 				goto out;
1618 			read++;
1619 			cnt--;
1620 		}
1621 
1622 		parser->idx = 0;
1623 
1624 		/* only spaces were written */
1625 		if (isspace(ch) || !ch) {
1626 			*ppos += read;
1627 			ret = read;
1628 			goto out;
1629 		}
1630 	}
1631 
1632 	/* read the non-space input */
1633 	while (cnt && !isspace(ch) && ch) {
1634 		if (parser->idx < parser->size - 1)
1635 			parser->buffer[parser->idx++] = ch;
1636 		else {
1637 			ret = -EINVAL;
1638 			goto out;
1639 		}
1640 		ret = get_user(ch, ubuf++);
1641 		if (ret)
1642 			goto out;
1643 		read++;
1644 		cnt--;
1645 	}
1646 
1647 	/* We either got finished input or we have to wait for another call. */
1648 	if (isspace(ch) || !ch) {
1649 		parser->buffer[parser->idx] = 0;
1650 		parser->cont = false;
1651 	} else if (parser->idx < parser->size - 1) {
1652 		parser->cont = true;
1653 		parser->buffer[parser->idx++] = ch;
1654 		/* Make sure the parsed string always terminates with '\0'. */
1655 		parser->buffer[parser->idx] = 0;
1656 	} else {
1657 		ret = -EINVAL;
1658 		goto out;
1659 	}
1660 
1661 	*ppos += read;
1662 	ret = read;
1663 
1664 out:
1665 	return ret;
1666 }
1667 
1668 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1669 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 {
1671 	int len;
1672 
1673 	if (trace_seq_used(s) <= s->seq.readpos)
1674 		return -EBUSY;
1675 
1676 	len = trace_seq_used(s) - s->seq.readpos;
1677 	if (cnt > len)
1678 		cnt = len;
1679 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1680 
1681 	s->seq.readpos += cnt;
1682 	return cnt;
1683 }
1684 
1685 unsigned long __read_mostly	tracing_thresh;
1686 static const struct file_operations tracing_max_lat_fops;
1687 
1688 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1689 	defined(CONFIG_FSNOTIFY)
1690 
1691 static struct workqueue_struct *fsnotify_wq;
1692 
latency_fsnotify_workfn(struct work_struct * work)1693 static void latency_fsnotify_workfn(struct work_struct *work)
1694 {
1695 	struct trace_array *tr = container_of(work, struct trace_array,
1696 					      fsnotify_work);
1697 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1698 }
1699 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1700 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1701 {
1702 	struct trace_array *tr = container_of(iwork, struct trace_array,
1703 					      fsnotify_irqwork);
1704 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1705 }
1706 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1707 static void trace_create_maxlat_file(struct trace_array *tr,
1708 				     struct dentry *d_tracer)
1709 {
1710 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1711 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1712 	tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1713 					      d_tracer, &tr->max_latency,
1714 					      &tracing_max_lat_fops);
1715 }
1716 
latency_fsnotify_init(void)1717 __init static int latency_fsnotify_init(void)
1718 {
1719 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1720 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1721 	if (!fsnotify_wq) {
1722 		pr_err("Unable to allocate tr_max_lat_wq\n");
1723 		return -ENOMEM;
1724 	}
1725 	return 0;
1726 }
1727 
1728 late_initcall_sync(latency_fsnotify_init);
1729 
latency_fsnotify(struct trace_array * tr)1730 void latency_fsnotify(struct trace_array *tr)
1731 {
1732 	if (!fsnotify_wq)
1733 		return;
1734 	/*
1735 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1736 	 * possible that we are called from __schedule() or do_idle(), which
1737 	 * could cause a deadlock.
1738 	 */
1739 	irq_work_queue(&tr->fsnotify_irqwork);
1740 }
1741 
1742 /*
1743  * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1744  *  defined(CONFIG_FSNOTIFY)
1745  */
1746 #else
1747 
1748 #define trace_create_maxlat_file(tr, d_tracer)				\
1749 	trace_create_file("tracing_max_latency", 0644, d_tracer,	\
1750 			  &tr->max_latency, &tracing_max_lat_fops)
1751 
1752 #endif
1753 
1754 #ifdef CONFIG_TRACER_MAX_TRACE
1755 /*
1756  * Copy the new maximum trace into the separate maximum-trace
1757  * structure. (this way the maximum trace is permanently saved,
1758  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1759  */
1760 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1761 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1762 {
1763 	struct array_buffer *trace_buf = &tr->array_buffer;
1764 	struct array_buffer *max_buf = &tr->max_buffer;
1765 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1766 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1767 
1768 	max_buf->cpu = cpu;
1769 	max_buf->time_start = data->preempt_timestamp;
1770 
1771 	max_data->saved_latency = tr->max_latency;
1772 	max_data->critical_start = data->critical_start;
1773 	max_data->critical_end = data->critical_end;
1774 
1775 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1776 	max_data->pid = tsk->pid;
1777 	/*
1778 	 * If tsk == current, then use current_uid(), as that does not use
1779 	 * RCU. The irq tracer can be called out of RCU scope.
1780 	 */
1781 	if (tsk == current)
1782 		max_data->uid = current_uid();
1783 	else
1784 		max_data->uid = task_uid(tsk);
1785 
1786 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1787 	max_data->policy = tsk->policy;
1788 	max_data->rt_priority = tsk->rt_priority;
1789 
1790 	/* record this tasks comm */
1791 	tracing_record_cmdline(tsk);
1792 	latency_fsnotify(tr);
1793 }
1794 
1795 /**
1796  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1797  * @tr: tracer
1798  * @tsk: the task with the latency
1799  * @cpu: The cpu that initiated the trace.
1800  * @cond_data: User data associated with a conditional snapshot
1801  *
1802  * Flip the buffers between the @tr and the max_tr and record information
1803  * about which task was the cause of this latency.
1804  */
1805 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1806 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1807 	      void *cond_data)
1808 {
1809 	if (tr->stop_count)
1810 		return;
1811 
1812 	WARN_ON_ONCE(!irqs_disabled());
1813 
1814 	if (!tr->allocated_snapshot) {
1815 		/* Only the nop tracer should hit this when disabling */
1816 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1817 		return;
1818 	}
1819 
1820 	arch_spin_lock(&tr->max_lock);
1821 
1822 	/* Inherit the recordable setting from array_buffer */
1823 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1824 		ring_buffer_record_on(tr->max_buffer.buffer);
1825 	else
1826 		ring_buffer_record_off(tr->max_buffer.buffer);
1827 
1828 #ifdef CONFIG_TRACER_SNAPSHOT
1829 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1830 		goto out_unlock;
1831 #endif
1832 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1833 
1834 	__update_max_tr(tr, tsk, cpu);
1835 
1836  out_unlock:
1837 	arch_spin_unlock(&tr->max_lock);
1838 }
1839 
1840 /**
1841  * update_max_tr_single - only copy one trace over, and reset the rest
1842  * @tr: tracer
1843  * @tsk: task with the latency
1844  * @cpu: the cpu of the buffer to copy.
1845  *
1846  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1847  */
1848 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1849 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1850 {
1851 	int ret;
1852 
1853 	if (tr->stop_count)
1854 		return;
1855 
1856 	WARN_ON_ONCE(!irqs_disabled());
1857 	if (!tr->allocated_snapshot) {
1858 		/* Only the nop tracer should hit this when disabling */
1859 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1860 		return;
1861 	}
1862 
1863 	arch_spin_lock(&tr->max_lock);
1864 
1865 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1866 
1867 	if (ret == -EBUSY) {
1868 		/*
1869 		 * We failed to swap the buffer due to a commit taking
1870 		 * place on this CPU. We fail to record, but we reset
1871 		 * the max trace buffer (no one writes directly to it)
1872 		 * and flag that it failed.
1873 		 */
1874 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1875 			"Failed to swap buffers due to commit in progress\n");
1876 	}
1877 
1878 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1879 
1880 	__update_max_tr(tr, tsk, cpu);
1881 	arch_spin_unlock(&tr->max_lock);
1882 }
1883 #endif /* CONFIG_TRACER_MAX_TRACE */
1884 
wait_on_pipe(struct trace_iterator * iter,int full)1885 static int wait_on_pipe(struct trace_iterator *iter, int full)
1886 {
1887 	/* Iterators are static, they should be filled or empty */
1888 	if (trace_buffer_iter(iter, iter->cpu_file))
1889 		return 0;
1890 
1891 	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1892 				full);
1893 }
1894 
1895 #ifdef CONFIG_FTRACE_STARTUP_TEST
1896 static bool selftests_can_run;
1897 
1898 struct trace_selftests {
1899 	struct list_head		list;
1900 	struct tracer			*type;
1901 };
1902 
1903 static LIST_HEAD(postponed_selftests);
1904 
save_selftest(struct tracer * type)1905 static int save_selftest(struct tracer *type)
1906 {
1907 	struct trace_selftests *selftest;
1908 
1909 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1910 	if (!selftest)
1911 		return -ENOMEM;
1912 
1913 	selftest->type = type;
1914 	list_add(&selftest->list, &postponed_selftests);
1915 	return 0;
1916 }
1917 
run_tracer_selftest(struct tracer * type)1918 static int run_tracer_selftest(struct tracer *type)
1919 {
1920 	struct trace_array *tr = &global_trace;
1921 	struct tracer *saved_tracer = tr->current_trace;
1922 	int ret;
1923 
1924 	if (!type->selftest || tracing_selftest_disabled)
1925 		return 0;
1926 
1927 	/*
1928 	 * If a tracer registers early in boot up (before scheduling is
1929 	 * initialized and such), then do not run its selftests yet.
1930 	 * Instead, run it a little later in the boot process.
1931 	 */
1932 	if (!selftests_can_run)
1933 		return save_selftest(type);
1934 
1935 	/*
1936 	 * Run a selftest on this tracer.
1937 	 * Here we reset the trace buffer, and set the current
1938 	 * tracer to be this tracer. The tracer can then run some
1939 	 * internal tracing to verify that everything is in order.
1940 	 * If we fail, we do not register this tracer.
1941 	 */
1942 	tracing_reset_online_cpus(&tr->array_buffer);
1943 
1944 	tr->current_trace = type;
1945 
1946 #ifdef CONFIG_TRACER_MAX_TRACE
1947 	if (type->use_max_tr) {
1948 		/* If we expanded the buffers, make sure the max is expanded too */
1949 		if (ring_buffer_expanded)
1950 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1951 					   RING_BUFFER_ALL_CPUS);
1952 		tr->allocated_snapshot = true;
1953 	}
1954 #endif
1955 
1956 	/* the test is responsible for initializing and enabling */
1957 	pr_info("Testing tracer %s: ", type->name);
1958 	ret = type->selftest(type, tr);
1959 	/* the test is responsible for resetting too */
1960 	tr->current_trace = saved_tracer;
1961 	if (ret) {
1962 		printk(KERN_CONT "FAILED!\n");
1963 		/* Add the warning after printing 'FAILED' */
1964 		WARN_ON(1);
1965 		return -1;
1966 	}
1967 	/* Only reset on passing, to avoid touching corrupted buffers */
1968 	tracing_reset_online_cpus(&tr->array_buffer);
1969 
1970 #ifdef CONFIG_TRACER_MAX_TRACE
1971 	if (type->use_max_tr) {
1972 		tr->allocated_snapshot = false;
1973 
1974 		/* Shrink the max buffer again */
1975 		if (ring_buffer_expanded)
1976 			ring_buffer_resize(tr->max_buffer.buffer, 1,
1977 					   RING_BUFFER_ALL_CPUS);
1978 	}
1979 #endif
1980 
1981 	printk(KERN_CONT "PASSED\n");
1982 	return 0;
1983 }
1984 
init_trace_selftests(void)1985 static __init int init_trace_selftests(void)
1986 {
1987 	struct trace_selftests *p, *n;
1988 	struct tracer *t, **last;
1989 	int ret;
1990 
1991 	selftests_can_run = true;
1992 
1993 	mutex_lock(&trace_types_lock);
1994 
1995 	if (list_empty(&postponed_selftests))
1996 		goto out;
1997 
1998 	pr_info("Running postponed tracer tests:\n");
1999 
2000 	tracing_selftest_running = true;
2001 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2002 		/* This loop can take minutes when sanitizers are enabled, so
2003 		 * lets make sure we allow RCU processing.
2004 		 */
2005 		cond_resched();
2006 		ret = run_tracer_selftest(p->type);
2007 		/* If the test fails, then warn and remove from available_tracers */
2008 		if (ret < 0) {
2009 			WARN(1, "tracer: %s failed selftest, disabling\n",
2010 			     p->type->name);
2011 			last = &trace_types;
2012 			for (t = trace_types; t; t = t->next) {
2013 				if (t == p->type) {
2014 					*last = t->next;
2015 					break;
2016 				}
2017 				last = &t->next;
2018 			}
2019 		}
2020 		list_del(&p->list);
2021 		kfree(p);
2022 	}
2023 	tracing_selftest_running = false;
2024 
2025  out:
2026 	mutex_unlock(&trace_types_lock);
2027 
2028 	return 0;
2029 }
2030 core_initcall(init_trace_selftests);
2031 #else
run_tracer_selftest(struct tracer * type)2032 static inline int run_tracer_selftest(struct tracer *type)
2033 {
2034 	return 0;
2035 }
2036 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2037 
2038 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2039 
2040 static void __init apply_trace_boot_options(void);
2041 
2042 /**
2043  * register_tracer - register a tracer with the ftrace system.
2044  * @type: the plugin for the tracer
2045  *
2046  * Register a new plugin tracer.
2047  */
register_tracer(struct tracer * type)2048 int __init register_tracer(struct tracer *type)
2049 {
2050 	struct tracer *t;
2051 	int ret = 0;
2052 
2053 	if (!type->name) {
2054 		pr_info("Tracer must have a name\n");
2055 		return -1;
2056 	}
2057 
2058 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2059 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2060 		return -1;
2061 	}
2062 
2063 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2064 		pr_warn("Can not register tracer %s due to lockdown\n",
2065 			   type->name);
2066 		return -EPERM;
2067 	}
2068 
2069 	mutex_lock(&trace_types_lock);
2070 
2071 	tracing_selftest_running = true;
2072 
2073 	for (t = trace_types; t; t = t->next) {
2074 		if (strcmp(type->name, t->name) == 0) {
2075 			/* already found */
2076 			pr_info("Tracer %s already registered\n",
2077 				type->name);
2078 			ret = -1;
2079 			goto out;
2080 		}
2081 	}
2082 
2083 	if (!type->set_flag)
2084 		type->set_flag = &dummy_set_flag;
2085 	if (!type->flags) {
2086 		/*allocate a dummy tracer_flags*/
2087 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2088 		if (!type->flags) {
2089 			ret = -ENOMEM;
2090 			goto out;
2091 		}
2092 		type->flags->val = 0;
2093 		type->flags->opts = dummy_tracer_opt;
2094 	} else
2095 		if (!type->flags->opts)
2096 			type->flags->opts = dummy_tracer_opt;
2097 
2098 	/* store the tracer for __set_tracer_option */
2099 	type->flags->trace = type;
2100 
2101 	ret = run_tracer_selftest(type);
2102 	if (ret < 0)
2103 		goto out;
2104 
2105 	type->next = trace_types;
2106 	trace_types = type;
2107 	add_tracer_options(&global_trace, type);
2108 
2109  out:
2110 	tracing_selftest_running = false;
2111 	mutex_unlock(&trace_types_lock);
2112 
2113 	if (ret || !default_bootup_tracer)
2114 		goto out_unlock;
2115 
2116 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2117 		goto out_unlock;
2118 
2119 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2120 	/* Do we want this tracer to start on bootup? */
2121 	tracing_set_tracer(&global_trace, type->name);
2122 	default_bootup_tracer = NULL;
2123 
2124 	apply_trace_boot_options();
2125 
2126 	/* disable other selftests, since this will break it. */
2127 	disable_tracing_selftest("running a tracer");
2128 
2129  out_unlock:
2130 	return ret;
2131 }
2132 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2133 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2134 {
2135 	struct trace_buffer *buffer = buf->buffer;
2136 
2137 	if (!buffer)
2138 		return;
2139 
2140 	ring_buffer_record_disable(buffer);
2141 
2142 	/* Make sure all commits have finished */
2143 	synchronize_rcu();
2144 	ring_buffer_reset_cpu(buffer, cpu);
2145 
2146 	ring_buffer_record_enable(buffer);
2147 }
2148 
tracing_reset_online_cpus(struct array_buffer * buf)2149 void tracing_reset_online_cpus(struct array_buffer *buf)
2150 {
2151 	struct trace_buffer *buffer = buf->buffer;
2152 
2153 	if (!buffer)
2154 		return;
2155 
2156 	ring_buffer_record_disable(buffer);
2157 
2158 	/* Make sure all commits have finished */
2159 	synchronize_rcu();
2160 
2161 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2162 
2163 	ring_buffer_reset_online_cpus(buffer);
2164 
2165 	ring_buffer_record_enable(buffer);
2166 }
2167 
2168 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus(void)2169 void tracing_reset_all_online_cpus(void)
2170 {
2171 	struct trace_array *tr;
2172 
2173 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2174 		if (!tr->clear_trace)
2175 			continue;
2176 		tr->clear_trace = false;
2177 		tracing_reset_online_cpus(&tr->array_buffer);
2178 #ifdef CONFIG_TRACER_MAX_TRACE
2179 		tracing_reset_online_cpus(&tr->max_buffer);
2180 #endif
2181 	}
2182 }
2183 
2184 /*
2185  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2186  * is the tgid last observed corresponding to pid=i.
2187  */
2188 static int *tgid_map;
2189 
2190 /* The maximum valid index into tgid_map. */
2191 static size_t tgid_map_max;
2192 
2193 #define SAVED_CMDLINES_DEFAULT 128
2194 #define NO_CMDLINE_MAP UINT_MAX
2195 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2196 struct saved_cmdlines_buffer {
2197 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2198 	unsigned *map_cmdline_to_pid;
2199 	unsigned cmdline_num;
2200 	int cmdline_idx;
2201 	char *saved_cmdlines;
2202 };
2203 static struct saved_cmdlines_buffer *savedcmd;
2204 
get_saved_cmdlines(int idx)2205 static inline char *get_saved_cmdlines(int idx)
2206 {
2207 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2208 }
2209 
set_cmdline(int idx,const char * cmdline)2210 static inline void set_cmdline(int idx, const char *cmdline)
2211 {
2212 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2213 }
2214 
allocate_cmdlines_buffer(unsigned int val,struct saved_cmdlines_buffer * s)2215 static int allocate_cmdlines_buffer(unsigned int val,
2216 				    struct saved_cmdlines_buffer *s)
2217 {
2218 	s->map_cmdline_to_pid = kmalloc_array(val,
2219 					      sizeof(*s->map_cmdline_to_pid),
2220 					      GFP_KERNEL);
2221 	if (!s->map_cmdline_to_pid)
2222 		return -ENOMEM;
2223 
2224 	s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2225 	if (!s->saved_cmdlines) {
2226 		kfree(s->map_cmdline_to_pid);
2227 		return -ENOMEM;
2228 	}
2229 
2230 	s->cmdline_idx = 0;
2231 	s->cmdline_num = val;
2232 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2233 	       sizeof(s->map_pid_to_cmdline));
2234 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2235 	       val * sizeof(*s->map_cmdline_to_pid));
2236 
2237 	return 0;
2238 }
2239 
trace_create_savedcmd(void)2240 static int trace_create_savedcmd(void)
2241 {
2242 	int ret;
2243 
2244 	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2245 	if (!savedcmd)
2246 		return -ENOMEM;
2247 
2248 	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2249 	if (ret < 0) {
2250 		kfree(savedcmd);
2251 		savedcmd = NULL;
2252 		return -ENOMEM;
2253 	}
2254 
2255 	return 0;
2256 }
2257 
is_tracing_stopped(void)2258 int is_tracing_stopped(void)
2259 {
2260 	return global_trace.stop_count;
2261 }
2262 
2263 /**
2264  * tracing_start - quick start of the tracer
2265  *
2266  * If tracing is enabled but was stopped by tracing_stop,
2267  * this will start the tracer back up.
2268  */
tracing_start(void)2269 void tracing_start(void)
2270 {
2271 	struct trace_buffer *buffer;
2272 	unsigned long flags;
2273 
2274 	if (tracing_disabled)
2275 		return;
2276 
2277 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2278 	if (--global_trace.stop_count) {
2279 		if (global_trace.stop_count < 0) {
2280 			/* Someone screwed up their debugging */
2281 			WARN_ON_ONCE(1);
2282 			global_trace.stop_count = 0;
2283 		}
2284 		goto out;
2285 	}
2286 
2287 	/* Prevent the buffers from switching */
2288 	arch_spin_lock(&global_trace.max_lock);
2289 
2290 	buffer = global_trace.array_buffer.buffer;
2291 	if (buffer)
2292 		ring_buffer_record_enable(buffer);
2293 
2294 #ifdef CONFIG_TRACER_MAX_TRACE
2295 	buffer = global_trace.max_buffer.buffer;
2296 	if (buffer)
2297 		ring_buffer_record_enable(buffer);
2298 #endif
2299 
2300 	arch_spin_unlock(&global_trace.max_lock);
2301 
2302  out:
2303 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2304 }
2305 
tracing_start_tr(struct trace_array * tr)2306 static void tracing_start_tr(struct trace_array *tr)
2307 {
2308 	struct trace_buffer *buffer;
2309 	unsigned long flags;
2310 
2311 	if (tracing_disabled)
2312 		return;
2313 
2314 	/* If global, we need to also start the max tracer */
2315 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2316 		return tracing_start();
2317 
2318 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2319 
2320 	if (--tr->stop_count) {
2321 		if (tr->stop_count < 0) {
2322 			/* Someone screwed up their debugging */
2323 			WARN_ON_ONCE(1);
2324 			tr->stop_count = 0;
2325 		}
2326 		goto out;
2327 	}
2328 
2329 	buffer = tr->array_buffer.buffer;
2330 	if (buffer)
2331 		ring_buffer_record_enable(buffer);
2332 
2333  out:
2334 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2335 }
2336 
2337 /**
2338  * tracing_stop - quick stop of the tracer
2339  *
2340  * Light weight way to stop tracing. Use in conjunction with
2341  * tracing_start.
2342  */
tracing_stop(void)2343 void tracing_stop(void)
2344 {
2345 	struct trace_buffer *buffer;
2346 	unsigned long flags;
2347 
2348 	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2349 	if (global_trace.stop_count++)
2350 		goto out;
2351 
2352 	/* Prevent the buffers from switching */
2353 	arch_spin_lock(&global_trace.max_lock);
2354 
2355 	buffer = global_trace.array_buffer.buffer;
2356 	if (buffer)
2357 		ring_buffer_record_disable(buffer);
2358 
2359 #ifdef CONFIG_TRACER_MAX_TRACE
2360 	buffer = global_trace.max_buffer.buffer;
2361 	if (buffer)
2362 		ring_buffer_record_disable(buffer);
2363 #endif
2364 
2365 	arch_spin_unlock(&global_trace.max_lock);
2366 
2367  out:
2368 	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2369 }
2370 
tracing_stop_tr(struct trace_array * tr)2371 static void tracing_stop_tr(struct trace_array *tr)
2372 {
2373 	struct trace_buffer *buffer;
2374 	unsigned long flags;
2375 
2376 	/* If global, we need to also stop the max tracer */
2377 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2378 		return tracing_stop();
2379 
2380 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2381 	if (tr->stop_count++)
2382 		goto out;
2383 
2384 	buffer = tr->array_buffer.buffer;
2385 	if (buffer)
2386 		ring_buffer_record_disable(buffer);
2387 
2388  out:
2389 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2390 }
2391 
trace_save_cmdline(struct task_struct * tsk)2392 static int trace_save_cmdline(struct task_struct *tsk)
2393 {
2394 	unsigned tpid, idx;
2395 
2396 	/* treat recording of idle task as a success */
2397 	if (!tsk->pid)
2398 		return 1;
2399 
2400 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2401 
2402 	/*
2403 	 * It's not the end of the world if we don't get
2404 	 * the lock, but we also don't want to spin
2405 	 * nor do we want to disable interrupts,
2406 	 * so if we miss here, then better luck next time.
2407 	 */
2408 	if (!arch_spin_trylock(&trace_cmdline_lock))
2409 		return 0;
2410 
2411 	idx = savedcmd->map_pid_to_cmdline[tpid];
2412 	if (idx == NO_CMDLINE_MAP) {
2413 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2414 
2415 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2416 		savedcmd->cmdline_idx = idx;
2417 	}
2418 
2419 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2420 	set_cmdline(idx, tsk->comm);
2421 
2422 	arch_spin_unlock(&trace_cmdline_lock);
2423 
2424 	return 1;
2425 }
2426 
__trace_find_cmdline(int pid,char comm[])2427 static void __trace_find_cmdline(int pid, char comm[])
2428 {
2429 	unsigned map;
2430 	int tpid;
2431 
2432 	if (!pid) {
2433 		strcpy(comm, "<idle>");
2434 		return;
2435 	}
2436 
2437 	if (WARN_ON_ONCE(pid < 0)) {
2438 		strcpy(comm, "<XXX>");
2439 		return;
2440 	}
2441 
2442 	tpid = pid & (PID_MAX_DEFAULT - 1);
2443 	map = savedcmd->map_pid_to_cmdline[tpid];
2444 	if (map != NO_CMDLINE_MAP) {
2445 		tpid = savedcmd->map_cmdline_to_pid[map];
2446 		if (tpid == pid) {
2447 			strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2448 			return;
2449 		}
2450 	}
2451 	strcpy(comm, "<...>");
2452 }
2453 
trace_find_cmdline(int pid,char comm[])2454 void trace_find_cmdline(int pid, char comm[])
2455 {
2456 	preempt_disable();
2457 	arch_spin_lock(&trace_cmdline_lock);
2458 
2459 	__trace_find_cmdline(pid, comm);
2460 
2461 	arch_spin_unlock(&trace_cmdline_lock);
2462 	preempt_enable();
2463 }
2464 
trace_find_tgid_ptr(int pid)2465 static int *trace_find_tgid_ptr(int pid)
2466 {
2467 	/*
2468 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2469 	 * if we observe a non-NULL tgid_map then we also observe the correct
2470 	 * tgid_map_max.
2471 	 */
2472 	int *map = smp_load_acquire(&tgid_map);
2473 
2474 	if (unlikely(!map || pid > tgid_map_max))
2475 		return NULL;
2476 
2477 	return &map[pid];
2478 }
2479 
trace_find_tgid(int pid)2480 int trace_find_tgid(int pid)
2481 {
2482 	int *ptr = trace_find_tgid_ptr(pid);
2483 
2484 	return ptr ? *ptr : 0;
2485 }
2486 
trace_save_tgid(struct task_struct * tsk)2487 static int trace_save_tgid(struct task_struct *tsk)
2488 {
2489 	int *ptr;
2490 
2491 	/* treat recording of idle task as a success */
2492 	if (!tsk->pid)
2493 		return 1;
2494 
2495 	ptr = trace_find_tgid_ptr(tsk->pid);
2496 	if (!ptr)
2497 		return 0;
2498 
2499 	*ptr = tsk->tgid;
2500 	return 1;
2501 }
2502 
tracing_record_taskinfo_skip(int flags)2503 static bool tracing_record_taskinfo_skip(int flags)
2504 {
2505 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2506 		return true;
2507 	if (!__this_cpu_read(trace_taskinfo_save))
2508 		return true;
2509 	return false;
2510 }
2511 
2512 /**
2513  * tracing_record_taskinfo - record the task info of a task
2514  *
2515  * @task:  task to record
2516  * @flags: TRACE_RECORD_CMDLINE for recording comm
2517  *         TRACE_RECORD_TGID for recording tgid
2518  */
tracing_record_taskinfo(struct task_struct * task,int flags)2519 void tracing_record_taskinfo(struct task_struct *task, int flags)
2520 {
2521 	bool done;
2522 
2523 	if (tracing_record_taskinfo_skip(flags))
2524 		return;
2525 
2526 	/*
2527 	 * Record as much task information as possible. If some fail, continue
2528 	 * to try to record the others.
2529 	 */
2530 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2531 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2532 
2533 	/* If recording any information failed, retry again soon. */
2534 	if (!done)
2535 		return;
2536 
2537 	__this_cpu_write(trace_taskinfo_save, false);
2538 }
2539 
2540 /**
2541  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2542  *
2543  * @prev: previous task during sched_switch
2544  * @next: next task during sched_switch
2545  * @flags: TRACE_RECORD_CMDLINE for recording comm
2546  *         TRACE_RECORD_TGID for recording tgid
2547  */
tracing_record_taskinfo_sched_switch(struct task_struct * prev,struct task_struct * next,int flags)2548 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2549 					  struct task_struct *next, int flags)
2550 {
2551 	bool done;
2552 
2553 	if (tracing_record_taskinfo_skip(flags))
2554 		return;
2555 
2556 	/*
2557 	 * Record as much task information as possible. If some fail, continue
2558 	 * to try to record the others.
2559 	 */
2560 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2561 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2562 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2563 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2564 
2565 	/* If recording any information failed, retry again soon. */
2566 	if (!done)
2567 		return;
2568 
2569 	__this_cpu_write(trace_taskinfo_save, false);
2570 }
2571 
2572 /* Helpers to record a specific task information */
tracing_record_cmdline(struct task_struct * task)2573 void tracing_record_cmdline(struct task_struct *task)
2574 {
2575 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2576 }
2577 
tracing_record_tgid(struct task_struct * task)2578 void tracing_record_tgid(struct task_struct *task)
2579 {
2580 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2581 }
2582 
2583 /*
2584  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2585  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2586  * simplifies those functions and keeps them in sync.
2587  */
trace_handle_return(struct trace_seq * s)2588 enum print_line_t trace_handle_return(struct trace_seq *s)
2589 {
2590 	return trace_seq_has_overflowed(s) ?
2591 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2592 }
2593 EXPORT_SYMBOL_GPL(trace_handle_return);
2594 
2595 void
tracing_generic_entry_update(struct trace_entry * entry,unsigned short type,unsigned long flags,int pc)2596 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2597 			     unsigned long flags, int pc)
2598 {
2599 	struct task_struct *tsk = current;
2600 
2601 	entry->preempt_count		= pc & 0xff;
2602 	entry->pid			= (tsk) ? tsk->pid : 0;
2603 	entry->type			= type;
2604 	entry->flags =
2605 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2606 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2607 #else
2608 		TRACE_FLAG_IRQS_NOSUPPORT |
2609 #endif
2610 		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2611 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2612 		((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2613 		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2614 		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2615 }
2616 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2617 
2618 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned long flags,int pc)2619 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2620 			  int type,
2621 			  unsigned long len,
2622 			  unsigned long flags, int pc)
2623 {
2624 	return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2625 }
2626 
2627 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2628 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2629 static int trace_buffered_event_ref;
2630 
2631 /**
2632  * trace_buffered_event_enable - enable buffering events
2633  *
2634  * When events are being filtered, it is quicker to use a temporary
2635  * buffer to write the event data into if there's a likely chance
2636  * that it will not be committed. The discard of the ring buffer
2637  * is not as fast as committing, and is much slower than copying
2638  * a commit.
2639  *
2640  * When an event is to be filtered, allocate per cpu buffers to
2641  * write the event data into, and if the event is filtered and discarded
2642  * it is simply dropped, otherwise, the entire data is to be committed
2643  * in one shot.
2644  */
trace_buffered_event_enable(void)2645 void trace_buffered_event_enable(void)
2646 {
2647 	struct ring_buffer_event *event;
2648 	struct page *page;
2649 	int cpu;
2650 
2651 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2652 
2653 	if (trace_buffered_event_ref++)
2654 		return;
2655 
2656 	for_each_tracing_cpu(cpu) {
2657 		page = alloc_pages_node(cpu_to_node(cpu),
2658 					GFP_KERNEL | __GFP_NORETRY, 0);
2659 		if (!page)
2660 			goto failed;
2661 
2662 		event = page_address(page);
2663 		memset(event, 0, sizeof(*event));
2664 
2665 		per_cpu(trace_buffered_event, cpu) = event;
2666 
2667 		preempt_disable();
2668 		if (cpu == smp_processor_id() &&
2669 		    __this_cpu_read(trace_buffered_event) !=
2670 		    per_cpu(trace_buffered_event, cpu))
2671 			WARN_ON_ONCE(1);
2672 		preempt_enable();
2673 	}
2674 
2675 	return;
2676  failed:
2677 	trace_buffered_event_disable();
2678 }
2679 
enable_trace_buffered_event(void * data)2680 static void enable_trace_buffered_event(void *data)
2681 {
2682 	/* Probably not needed, but do it anyway */
2683 	smp_rmb();
2684 	this_cpu_dec(trace_buffered_event_cnt);
2685 }
2686 
disable_trace_buffered_event(void * data)2687 static void disable_trace_buffered_event(void *data)
2688 {
2689 	this_cpu_inc(trace_buffered_event_cnt);
2690 }
2691 
2692 /**
2693  * trace_buffered_event_disable - disable buffering events
2694  *
2695  * When a filter is removed, it is faster to not use the buffered
2696  * events, and to commit directly into the ring buffer. Free up
2697  * the temp buffers when there are no more users. This requires
2698  * special synchronization with current events.
2699  */
trace_buffered_event_disable(void)2700 void trace_buffered_event_disable(void)
2701 {
2702 	int cpu;
2703 
2704 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2705 
2706 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2707 		return;
2708 
2709 	if (--trace_buffered_event_ref)
2710 		return;
2711 
2712 	preempt_disable();
2713 	/* For each CPU, set the buffer as used. */
2714 	smp_call_function_many(tracing_buffer_mask,
2715 			       disable_trace_buffered_event, NULL, 1);
2716 	preempt_enable();
2717 
2718 	/* Wait for all current users to finish */
2719 	synchronize_rcu();
2720 
2721 	for_each_tracing_cpu(cpu) {
2722 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2723 		per_cpu(trace_buffered_event, cpu) = NULL;
2724 	}
2725 	/*
2726 	 * Make sure trace_buffered_event is NULL before clearing
2727 	 * trace_buffered_event_cnt.
2728 	 */
2729 	smp_wmb();
2730 
2731 	preempt_disable();
2732 	/* Do the work on each cpu */
2733 	smp_call_function_many(tracing_buffer_mask,
2734 			       enable_trace_buffered_event, NULL, 1);
2735 	preempt_enable();
2736 }
2737 
2738 static struct trace_buffer *temp_buffer;
2739 
2740 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned long flags,int pc)2741 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2742 			  struct trace_event_file *trace_file,
2743 			  int type, unsigned long len,
2744 			  unsigned long flags, int pc)
2745 {
2746 	struct ring_buffer_event *entry;
2747 	int val;
2748 
2749 	*current_rb = trace_file->tr->array_buffer.buffer;
2750 
2751 	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2752 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2753 	    (entry = this_cpu_read(trace_buffered_event))) {
2754 		/* Try to use the per cpu buffer first */
2755 		val = this_cpu_inc_return(trace_buffered_event_cnt);
2756 		if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2757 			trace_event_setup(entry, type, flags, pc);
2758 			entry->array[0] = len;
2759 			return entry;
2760 		}
2761 		this_cpu_dec(trace_buffered_event_cnt);
2762 	}
2763 
2764 	entry = __trace_buffer_lock_reserve(*current_rb,
2765 					    type, len, flags, pc);
2766 	/*
2767 	 * If tracing is off, but we have triggers enabled
2768 	 * we still need to look at the event data. Use the temp_buffer
2769 	 * to store the trace event for the trigger to use. It's recursive
2770 	 * safe and will not be recorded anywhere.
2771 	 */
2772 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2773 		*current_rb = temp_buffer;
2774 		entry = __trace_buffer_lock_reserve(*current_rb,
2775 						    type, len, flags, pc);
2776 	}
2777 	return entry;
2778 }
2779 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2780 
2781 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2782 static DEFINE_MUTEX(tracepoint_printk_mutex);
2783 
output_printk(struct trace_event_buffer * fbuffer)2784 static void output_printk(struct trace_event_buffer *fbuffer)
2785 {
2786 	struct trace_event_call *event_call;
2787 	struct trace_event_file *file;
2788 	struct trace_event *event;
2789 	unsigned long flags;
2790 	struct trace_iterator *iter = tracepoint_print_iter;
2791 
2792 	/* We should never get here if iter is NULL */
2793 	if (WARN_ON_ONCE(!iter))
2794 		return;
2795 
2796 	event_call = fbuffer->trace_file->event_call;
2797 	if (!event_call || !event_call->event.funcs ||
2798 	    !event_call->event.funcs->trace)
2799 		return;
2800 
2801 	file = fbuffer->trace_file;
2802 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2803 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2804 	     !filter_match_preds(file->filter, fbuffer->entry)))
2805 		return;
2806 
2807 	event = &fbuffer->trace_file->event_call->event;
2808 
2809 	spin_lock_irqsave(&tracepoint_iter_lock, flags);
2810 	trace_seq_init(&iter->seq);
2811 	iter->ent = fbuffer->entry;
2812 	event_call->event.funcs->trace(iter, 0, event);
2813 	trace_seq_putc(&iter->seq, 0);
2814 	printk("%s", iter->seq.buffer);
2815 
2816 	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2817 }
2818 
tracepoint_printk_sysctl(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2819 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2820 			     void *buffer, size_t *lenp,
2821 			     loff_t *ppos)
2822 {
2823 	int save_tracepoint_printk;
2824 	int ret;
2825 
2826 	mutex_lock(&tracepoint_printk_mutex);
2827 	save_tracepoint_printk = tracepoint_printk;
2828 
2829 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2830 
2831 	/*
2832 	 * This will force exiting early, as tracepoint_printk
2833 	 * is always zero when tracepoint_printk_iter is not allocated
2834 	 */
2835 	if (!tracepoint_print_iter)
2836 		tracepoint_printk = 0;
2837 
2838 	if (save_tracepoint_printk == tracepoint_printk)
2839 		goto out;
2840 
2841 	if (tracepoint_printk)
2842 		static_key_enable(&tracepoint_printk_key.key);
2843 	else
2844 		static_key_disable(&tracepoint_printk_key.key);
2845 
2846  out:
2847 	mutex_unlock(&tracepoint_printk_mutex);
2848 
2849 	return ret;
2850 }
2851 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2852 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2853 {
2854 	if (static_key_false(&tracepoint_printk_key.key))
2855 		output_printk(fbuffer);
2856 
2857 	if (static_branch_unlikely(&trace_event_exports_enabled))
2858 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2859 	event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2860 				    fbuffer->event, fbuffer->entry,
2861 				    fbuffer->flags, fbuffer->pc, fbuffer->regs);
2862 }
2863 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2864 
2865 /*
2866  * Skip 3:
2867  *
2868  *   trace_buffer_unlock_commit_regs()
2869  *   trace_event_buffer_commit()
2870  *   trace_event_raw_event_xxx()
2871  */
2872 # define STACK_SKIP 3
2873 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned long flags,int pc,struct pt_regs * regs)2874 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2875 				     struct trace_buffer *buffer,
2876 				     struct ring_buffer_event *event,
2877 				     unsigned long flags, int pc,
2878 				     struct pt_regs *regs)
2879 {
2880 	__buffer_unlock_commit(buffer, event);
2881 
2882 	/*
2883 	 * If regs is not set, then skip the necessary functions.
2884 	 * Note, we can still get here via blktrace, wakeup tracer
2885 	 * and mmiotrace, but that's ok if they lose a function or
2886 	 * two. They are not that meaningful.
2887 	 */
2888 	ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2889 	ftrace_trace_userstack(tr, buffer, flags, pc);
2890 }
2891 
2892 /*
2893  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2894  */
2895 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2896 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2897 				   struct ring_buffer_event *event)
2898 {
2899 	__buffer_unlock_commit(buffer, event);
2900 }
2901 
2902 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned long flags,int pc)2903 trace_function(struct trace_array *tr,
2904 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
2905 	       int pc)
2906 {
2907 	struct trace_event_call *call = &event_function;
2908 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2909 	struct ring_buffer_event *event;
2910 	struct ftrace_entry *entry;
2911 
2912 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2913 					    flags, pc);
2914 	if (!event)
2915 		return;
2916 	entry	= ring_buffer_event_data(event);
2917 	entry->ip			= ip;
2918 	entry->parent_ip		= parent_ip;
2919 
2920 	if (!call_filter_check_discard(call, entry, buffer, event)) {
2921 		if (static_branch_unlikely(&trace_function_exports_enabled))
2922 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2923 		__buffer_unlock_commit(buffer, event);
2924 	}
2925 }
2926 
2927 #ifdef CONFIG_STACKTRACE
2928 
2929 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2930 #define FTRACE_KSTACK_NESTING	4
2931 
2932 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
2933 
2934 struct ftrace_stack {
2935 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2936 };
2937 
2938 
2939 struct ftrace_stacks {
2940 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2941 };
2942 
2943 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2944 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2945 
__ftrace_trace_stack(struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)2946 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2947 				 unsigned long flags,
2948 				 int skip, int pc, struct pt_regs *regs)
2949 {
2950 	struct trace_event_call *call = &event_kernel_stack;
2951 	struct ring_buffer_event *event;
2952 	unsigned int size, nr_entries;
2953 	struct ftrace_stack *fstack;
2954 	struct stack_entry *entry;
2955 	int stackidx;
2956 
2957 	/*
2958 	 * Add one, for this function and the call to save_stack_trace()
2959 	 * If regs is set, then these functions will not be in the way.
2960 	 */
2961 #ifndef CONFIG_UNWINDER_ORC
2962 	if (!regs)
2963 		skip++;
2964 #endif
2965 
2966 	preempt_disable_notrace();
2967 
2968 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2969 
2970 	/* This should never happen. If it does, yell once and skip */
2971 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2972 		goto out;
2973 
2974 	/*
2975 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2976 	 * interrupt will either see the value pre increment or post
2977 	 * increment. If the interrupt happens pre increment it will have
2978 	 * restored the counter when it returns.  We just need a barrier to
2979 	 * keep gcc from moving things around.
2980 	 */
2981 	barrier();
2982 
2983 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2984 	size = ARRAY_SIZE(fstack->calls);
2985 
2986 	if (regs) {
2987 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2988 						   size, skip);
2989 	} else {
2990 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2991 	}
2992 
2993 	size = nr_entries * sizeof(unsigned long);
2994 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2995 				    (sizeof(*entry) - sizeof(entry->caller)) + size,
2996 				    flags, pc);
2997 	if (!event)
2998 		goto out;
2999 	entry = ring_buffer_event_data(event);
3000 
3001 	memcpy(&entry->caller, fstack->calls, size);
3002 	entry->size = nr_entries;
3003 
3004 	if (!call_filter_check_discard(call, entry, buffer, event))
3005 		__buffer_unlock_commit(buffer, event);
3006 
3007  out:
3008 	/* Again, don't let gcc optimize things here */
3009 	barrier();
3010 	__this_cpu_dec(ftrace_stack_reserve);
3011 	preempt_enable_notrace();
3012 
3013 }
3014 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int skip,int pc,struct pt_regs * regs)3015 static inline void ftrace_trace_stack(struct trace_array *tr,
3016 				      struct trace_buffer *buffer,
3017 				      unsigned long flags,
3018 				      int skip, int pc, struct pt_regs *regs)
3019 {
3020 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3021 		return;
3022 
3023 	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
3024 }
3025 
__trace_stack(struct trace_array * tr,unsigned long flags,int skip,int pc)3026 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3027 		   int pc)
3028 {
3029 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3030 
3031 	if (rcu_is_watching()) {
3032 		__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3033 		return;
3034 	}
3035 
3036 	/*
3037 	 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3038 	 * but if the above rcu_is_watching() failed, then the NMI
3039 	 * triggered someplace critical, and rcu_irq_enter() should
3040 	 * not be called from NMI.
3041 	 */
3042 	if (unlikely(in_nmi()))
3043 		return;
3044 
3045 	rcu_irq_enter_irqson();
3046 	__ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3047 	rcu_irq_exit_irqson();
3048 }
3049 
3050 /**
3051  * trace_dump_stack - record a stack back trace in the trace buffer
3052  * @skip: Number of functions to skip (helper handlers)
3053  */
trace_dump_stack(int skip)3054 void trace_dump_stack(int skip)
3055 {
3056 	unsigned long flags;
3057 
3058 	if (tracing_disabled || tracing_selftest_running)
3059 		return;
3060 
3061 	local_save_flags(flags);
3062 
3063 #ifndef CONFIG_UNWINDER_ORC
3064 	/* Skip 1 to skip this function. */
3065 	skip++;
3066 #endif
3067 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3068 			     flags, skip, preempt_count(), NULL);
3069 }
3070 EXPORT_SYMBOL_GPL(trace_dump_stack);
3071 
3072 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3073 static DEFINE_PER_CPU(int, user_stack_count);
3074 
3075 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3076 ftrace_trace_userstack(struct trace_array *tr,
3077 		       struct trace_buffer *buffer, unsigned long flags, int pc)
3078 {
3079 	struct trace_event_call *call = &event_user_stack;
3080 	struct ring_buffer_event *event;
3081 	struct userstack_entry *entry;
3082 
3083 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3084 		return;
3085 
3086 	/*
3087 	 * NMIs can not handle page faults, even with fix ups.
3088 	 * The save user stack can (and often does) fault.
3089 	 */
3090 	if (unlikely(in_nmi()))
3091 		return;
3092 
3093 	/*
3094 	 * prevent recursion, since the user stack tracing may
3095 	 * trigger other kernel events.
3096 	 */
3097 	preempt_disable();
3098 	if (__this_cpu_read(user_stack_count))
3099 		goto out;
3100 
3101 	__this_cpu_inc(user_stack_count);
3102 
3103 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3104 					    sizeof(*entry), flags, pc);
3105 	if (!event)
3106 		goto out_drop_count;
3107 	entry	= ring_buffer_event_data(event);
3108 
3109 	entry->tgid		= current->tgid;
3110 	memset(&entry->caller, 0, sizeof(entry->caller));
3111 
3112 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3113 	if (!call_filter_check_discard(call, entry, buffer, event))
3114 		__buffer_unlock_commit(buffer, event);
3115 
3116  out_drop_count:
3117 	__this_cpu_dec(user_stack_count);
3118  out:
3119 	preempt_enable();
3120 }
3121 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long flags,int pc)3122 static void ftrace_trace_userstack(struct trace_array *tr,
3123 				   struct trace_buffer *buffer,
3124 				   unsigned long flags, int pc)
3125 {
3126 }
3127 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3128 
3129 #endif /* CONFIG_STACKTRACE */
3130 
3131 /* created for use with alloc_percpu */
3132 struct trace_buffer_struct {
3133 	int nesting;
3134 	char buffer[4][TRACE_BUF_SIZE];
3135 };
3136 
3137 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3138 
3139 /*
3140  * Thise allows for lockless recording.  If we're nested too deeply, then
3141  * this returns NULL.
3142  */
get_trace_buf(void)3143 static char *get_trace_buf(void)
3144 {
3145 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3146 
3147 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3148 		return NULL;
3149 
3150 	buffer->nesting++;
3151 
3152 	/* Interrupts must see nesting incremented before we use the buffer */
3153 	barrier();
3154 	return &buffer->buffer[buffer->nesting - 1][0];
3155 }
3156 
put_trace_buf(void)3157 static void put_trace_buf(void)
3158 {
3159 	/* Don't let the decrement of nesting leak before this */
3160 	barrier();
3161 	this_cpu_dec(trace_percpu_buffer->nesting);
3162 }
3163 
alloc_percpu_trace_buffer(void)3164 static int alloc_percpu_trace_buffer(void)
3165 {
3166 	struct trace_buffer_struct __percpu *buffers;
3167 
3168 	if (trace_percpu_buffer)
3169 		return 0;
3170 
3171 	buffers = alloc_percpu(struct trace_buffer_struct);
3172 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3173 		return -ENOMEM;
3174 
3175 	trace_percpu_buffer = buffers;
3176 	return 0;
3177 }
3178 
3179 static int buffers_allocated;
3180 
trace_printk_init_buffers(void)3181 void trace_printk_init_buffers(void)
3182 {
3183 	if (buffers_allocated)
3184 		return;
3185 
3186 	if (alloc_percpu_trace_buffer())
3187 		return;
3188 
3189 	/* trace_printk() is for debug use only. Don't use it in production. */
3190 
3191 	pr_warn("\n");
3192 	pr_warn("**********************************************************\n");
3193 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3194 	pr_warn("**                                                      **\n");
3195 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3196 	pr_warn("**                                                      **\n");
3197 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3198 	pr_warn("** unsafe for production use.                           **\n");
3199 	pr_warn("**                                                      **\n");
3200 	pr_warn("** If you see this message and you are not debugging    **\n");
3201 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3202 	pr_warn("**                                                      **\n");
3203 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3204 	pr_warn("**********************************************************\n");
3205 
3206 	/* Expand the buffers to set size */
3207 	tracing_update_buffers();
3208 
3209 	buffers_allocated = 1;
3210 
3211 	/*
3212 	 * trace_printk_init_buffers() can be called by modules.
3213 	 * If that happens, then we need to start cmdline recording
3214 	 * directly here. If the global_trace.buffer is already
3215 	 * allocated here, then this was called by module code.
3216 	 */
3217 	if (global_trace.array_buffer.buffer)
3218 		tracing_start_cmdline_record();
3219 }
3220 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3221 
trace_printk_start_comm(void)3222 void trace_printk_start_comm(void)
3223 {
3224 	/* Start tracing comms if trace printk is set */
3225 	if (!buffers_allocated)
3226 		return;
3227 	tracing_start_cmdline_record();
3228 }
3229 
trace_printk_start_stop_comm(int enabled)3230 static void trace_printk_start_stop_comm(int enabled)
3231 {
3232 	if (!buffers_allocated)
3233 		return;
3234 
3235 	if (enabled)
3236 		tracing_start_cmdline_record();
3237 	else
3238 		tracing_stop_cmdline_record();
3239 }
3240 
3241 /**
3242  * trace_vbprintk - write binary msg to tracing buffer
3243  * @ip:    The address of the caller
3244  * @fmt:   The string format to write to the buffer
3245  * @args:  Arguments for @fmt
3246  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3247 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3248 {
3249 	struct trace_event_call *call = &event_bprint;
3250 	struct ring_buffer_event *event;
3251 	struct trace_buffer *buffer;
3252 	struct trace_array *tr = &global_trace;
3253 	struct bprint_entry *entry;
3254 	unsigned long flags;
3255 	char *tbuffer;
3256 	int len = 0, size, pc;
3257 
3258 	if (unlikely(tracing_selftest_running || tracing_disabled))
3259 		return 0;
3260 
3261 	/* Don't pollute graph traces with trace_vprintk internals */
3262 	pause_graph_tracing();
3263 
3264 	pc = preempt_count();
3265 	preempt_disable_notrace();
3266 
3267 	tbuffer = get_trace_buf();
3268 	if (!tbuffer) {
3269 		len = 0;
3270 		goto out_nobuffer;
3271 	}
3272 
3273 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3274 
3275 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3276 		goto out_put;
3277 
3278 	local_save_flags(flags);
3279 	size = sizeof(*entry) + sizeof(u32) * len;
3280 	buffer = tr->array_buffer.buffer;
3281 	ring_buffer_nest_start(buffer);
3282 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3283 					    flags, pc);
3284 	if (!event)
3285 		goto out;
3286 	entry = ring_buffer_event_data(event);
3287 	entry->ip			= ip;
3288 	entry->fmt			= fmt;
3289 
3290 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3291 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3292 		__buffer_unlock_commit(buffer, event);
3293 		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3294 	}
3295 
3296 out:
3297 	ring_buffer_nest_end(buffer);
3298 out_put:
3299 	put_trace_buf();
3300 
3301 out_nobuffer:
3302 	preempt_enable_notrace();
3303 	unpause_graph_tracing();
3304 
3305 	return len;
3306 }
3307 EXPORT_SYMBOL_GPL(trace_vbprintk);
3308 
3309 __printf(3, 0)
3310 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3311 __trace_array_vprintk(struct trace_buffer *buffer,
3312 		      unsigned long ip, const char *fmt, va_list args)
3313 {
3314 	struct trace_event_call *call = &event_print;
3315 	struct ring_buffer_event *event;
3316 	int len = 0, size, pc;
3317 	struct print_entry *entry;
3318 	unsigned long flags;
3319 	char *tbuffer;
3320 
3321 	if (tracing_disabled || tracing_selftest_running)
3322 		return 0;
3323 
3324 	/* Don't pollute graph traces with trace_vprintk internals */
3325 	pause_graph_tracing();
3326 
3327 	pc = preempt_count();
3328 	preempt_disable_notrace();
3329 
3330 
3331 	tbuffer = get_trace_buf();
3332 	if (!tbuffer) {
3333 		len = 0;
3334 		goto out_nobuffer;
3335 	}
3336 
3337 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3338 
3339 	local_save_flags(flags);
3340 	size = sizeof(*entry) + len + 1;
3341 	ring_buffer_nest_start(buffer);
3342 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3343 					    flags, pc);
3344 	if (!event)
3345 		goto out;
3346 	entry = ring_buffer_event_data(event);
3347 	entry->ip = ip;
3348 
3349 	memcpy(&entry->buf, tbuffer, len + 1);
3350 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3351 		__buffer_unlock_commit(buffer, event);
3352 		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3353 	}
3354 
3355 out:
3356 	ring_buffer_nest_end(buffer);
3357 	put_trace_buf();
3358 
3359 out_nobuffer:
3360 	preempt_enable_notrace();
3361 	unpause_graph_tracing();
3362 
3363 	return len;
3364 }
3365 
3366 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3367 int trace_array_vprintk(struct trace_array *tr,
3368 			unsigned long ip, const char *fmt, va_list args)
3369 {
3370 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3371 }
3372 
3373 /**
3374  * trace_array_printk - Print a message to a specific instance
3375  * @tr: The instance trace_array descriptor
3376  * @ip: The instruction pointer that this is called from.
3377  * @fmt: The format to print (printf format)
3378  *
3379  * If a subsystem sets up its own instance, they have the right to
3380  * printk strings into their tracing instance buffer using this
3381  * function. Note, this function will not write into the top level
3382  * buffer (use trace_printk() for that), as writing into the top level
3383  * buffer should only have events that can be individually disabled.
3384  * trace_printk() is only used for debugging a kernel, and should not
3385  * be ever encorporated in normal use.
3386  *
3387  * trace_array_printk() can be used, as it will not add noise to the
3388  * top level tracing buffer.
3389  *
3390  * Note, trace_array_init_printk() must be called on @tr before this
3391  * can be used.
3392  */
3393 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3394 int trace_array_printk(struct trace_array *tr,
3395 		       unsigned long ip, const char *fmt, ...)
3396 {
3397 	int ret;
3398 	va_list ap;
3399 
3400 	if (!tr)
3401 		return -ENOENT;
3402 
3403 	/* This is only allowed for created instances */
3404 	if (tr == &global_trace)
3405 		return 0;
3406 
3407 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3408 		return 0;
3409 
3410 	va_start(ap, fmt);
3411 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3412 	va_end(ap);
3413 	return ret;
3414 }
3415 EXPORT_SYMBOL_GPL(trace_array_printk);
3416 
3417 /**
3418  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3419  * @tr: The trace array to initialize the buffers for
3420  *
3421  * As trace_array_printk() only writes into instances, they are OK to
3422  * have in the kernel (unlike trace_printk()). This needs to be called
3423  * before trace_array_printk() can be used on a trace_array.
3424  */
trace_array_init_printk(struct trace_array * tr)3425 int trace_array_init_printk(struct trace_array *tr)
3426 {
3427 	if (!tr)
3428 		return -ENOENT;
3429 
3430 	/* This is only allowed for created instances */
3431 	if (tr == &global_trace)
3432 		return -EINVAL;
3433 
3434 	return alloc_percpu_trace_buffer();
3435 }
3436 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3437 
3438 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3439 int trace_array_printk_buf(struct trace_buffer *buffer,
3440 			   unsigned long ip, const char *fmt, ...)
3441 {
3442 	int ret;
3443 	va_list ap;
3444 
3445 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3446 		return 0;
3447 
3448 	va_start(ap, fmt);
3449 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3450 	va_end(ap);
3451 	return ret;
3452 }
3453 
3454 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3455 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3456 {
3457 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3458 }
3459 EXPORT_SYMBOL_GPL(trace_vprintk);
3460 
trace_iterator_increment(struct trace_iterator * iter)3461 static void trace_iterator_increment(struct trace_iterator *iter)
3462 {
3463 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3464 
3465 	iter->idx++;
3466 	if (buf_iter)
3467 		ring_buffer_iter_advance(buf_iter);
3468 }
3469 
3470 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3471 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3472 		unsigned long *lost_events)
3473 {
3474 	struct ring_buffer_event *event;
3475 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3476 
3477 	if (buf_iter) {
3478 		event = ring_buffer_iter_peek(buf_iter, ts);
3479 		if (lost_events)
3480 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3481 				(unsigned long)-1 : 0;
3482 	} else {
3483 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3484 					 lost_events);
3485 	}
3486 
3487 	if (event) {
3488 		iter->ent_size = ring_buffer_event_length(event);
3489 		return ring_buffer_event_data(event);
3490 	}
3491 	iter->ent_size = 0;
3492 	return NULL;
3493 }
3494 
3495 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3496 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3497 		  unsigned long *missing_events, u64 *ent_ts)
3498 {
3499 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3500 	struct trace_entry *ent, *next = NULL;
3501 	unsigned long lost_events = 0, next_lost = 0;
3502 	int cpu_file = iter->cpu_file;
3503 	u64 next_ts = 0, ts;
3504 	int next_cpu = -1;
3505 	int next_size = 0;
3506 	int cpu;
3507 
3508 	/*
3509 	 * If we are in a per_cpu trace file, don't bother by iterating over
3510 	 * all cpu and peek directly.
3511 	 */
3512 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3513 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3514 			return NULL;
3515 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3516 		if (ent_cpu)
3517 			*ent_cpu = cpu_file;
3518 
3519 		return ent;
3520 	}
3521 
3522 	for_each_tracing_cpu(cpu) {
3523 
3524 		if (ring_buffer_empty_cpu(buffer, cpu))
3525 			continue;
3526 
3527 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3528 
3529 		/*
3530 		 * Pick the entry with the smallest timestamp:
3531 		 */
3532 		if (ent && (!next || ts < next_ts)) {
3533 			next = ent;
3534 			next_cpu = cpu;
3535 			next_ts = ts;
3536 			next_lost = lost_events;
3537 			next_size = iter->ent_size;
3538 		}
3539 	}
3540 
3541 	iter->ent_size = next_size;
3542 
3543 	if (ent_cpu)
3544 		*ent_cpu = next_cpu;
3545 
3546 	if (ent_ts)
3547 		*ent_ts = next_ts;
3548 
3549 	if (missing_events)
3550 		*missing_events = next_lost;
3551 
3552 	return next;
3553 }
3554 
3555 #define STATIC_TEMP_BUF_SIZE	128
3556 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3557 
3558 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3559 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3560 					  int *ent_cpu, u64 *ent_ts)
3561 {
3562 	/* __find_next_entry will reset ent_size */
3563 	int ent_size = iter->ent_size;
3564 	struct trace_entry *entry;
3565 
3566 	/*
3567 	 * If called from ftrace_dump(), then the iter->temp buffer
3568 	 * will be the static_temp_buf and not created from kmalloc.
3569 	 * If the entry size is greater than the buffer, we can
3570 	 * not save it. Just return NULL in that case. This is only
3571 	 * used to add markers when two consecutive events' time
3572 	 * stamps have a large delta. See trace_print_lat_context()
3573 	 */
3574 	if (iter->temp == static_temp_buf &&
3575 	    STATIC_TEMP_BUF_SIZE < ent_size)
3576 		return NULL;
3577 
3578 	/*
3579 	 * The __find_next_entry() may call peek_next_entry(), which may
3580 	 * call ring_buffer_peek() that may make the contents of iter->ent
3581 	 * undefined. Need to copy iter->ent now.
3582 	 */
3583 	if (iter->ent && iter->ent != iter->temp) {
3584 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3585 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3586 			void *temp;
3587 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3588 			if (!temp)
3589 				return NULL;
3590 			kfree(iter->temp);
3591 			iter->temp = temp;
3592 			iter->temp_size = iter->ent_size;
3593 		}
3594 		memcpy(iter->temp, iter->ent, iter->ent_size);
3595 		iter->ent = iter->temp;
3596 	}
3597 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3598 	/* Put back the original ent_size */
3599 	iter->ent_size = ent_size;
3600 
3601 	return entry;
3602 }
3603 
3604 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3605 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3606 {
3607 	iter->ent = __find_next_entry(iter, &iter->cpu,
3608 				      &iter->lost_events, &iter->ts);
3609 
3610 	if (iter->ent)
3611 		trace_iterator_increment(iter);
3612 
3613 	return iter->ent ? iter : NULL;
3614 }
3615 
trace_consume(struct trace_iterator * iter)3616 static void trace_consume(struct trace_iterator *iter)
3617 {
3618 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3619 			    &iter->lost_events);
3620 }
3621 
s_next(struct seq_file * m,void * v,loff_t * pos)3622 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3623 {
3624 	struct trace_iterator *iter = m->private;
3625 	int i = (int)*pos;
3626 	void *ent;
3627 
3628 	WARN_ON_ONCE(iter->leftover);
3629 
3630 	(*pos)++;
3631 
3632 	/* can't go backwards */
3633 	if (iter->idx > i)
3634 		return NULL;
3635 
3636 	if (iter->idx < 0)
3637 		ent = trace_find_next_entry_inc(iter);
3638 	else
3639 		ent = iter;
3640 
3641 	while (ent && iter->idx < i)
3642 		ent = trace_find_next_entry_inc(iter);
3643 
3644 	iter->pos = *pos;
3645 
3646 	return ent;
3647 }
3648 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3649 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3650 {
3651 	struct ring_buffer_iter *buf_iter;
3652 	unsigned long entries = 0;
3653 	u64 ts;
3654 
3655 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3656 
3657 	buf_iter = trace_buffer_iter(iter, cpu);
3658 	if (!buf_iter)
3659 		return;
3660 
3661 	ring_buffer_iter_reset(buf_iter);
3662 
3663 	/*
3664 	 * We could have the case with the max latency tracers
3665 	 * that a reset never took place on a cpu. This is evident
3666 	 * by the timestamp being before the start of the buffer.
3667 	 */
3668 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3669 		if (ts >= iter->array_buffer->time_start)
3670 			break;
3671 		entries++;
3672 		ring_buffer_iter_advance(buf_iter);
3673 	}
3674 
3675 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3676 }
3677 
3678 /*
3679  * The current tracer is copied to avoid a global locking
3680  * all around.
3681  */
s_start(struct seq_file * m,loff_t * pos)3682 static void *s_start(struct seq_file *m, loff_t *pos)
3683 {
3684 	struct trace_iterator *iter = m->private;
3685 	struct trace_array *tr = iter->tr;
3686 	int cpu_file = iter->cpu_file;
3687 	void *p = NULL;
3688 	loff_t l = 0;
3689 	int cpu;
3690 
3691 	/*
3692 	 * copy the tracer to avoid using a global lock all around.
3693 	 * iter->trace is a copy of current_trace, the pointer to the
3694 	 * name may be used instead of a strcmp(), as iter->trace->name
3695 	 * will point to the same string as current_trace->name.
3696 	 */
3697 	mutex_lock(&trace_types_lock);
3698 	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3699 		*iter->trace = *tr->current_trace;
3700 	mutex_unlock(&trace_types_lock);
3701 
3702 #ifdef CONFIG_TRACER_MAX_TRACE
3703 	if (iter->snapshot && iter->trace->use_max_tr)
3704 		return ERR_PTR(-EBUSY);
3705 #endif
3706 
3707 	if (*pos != iter->pos) {
3708 		iter->ent = NULL;
3709 		iter->cpu = 0;
3710 		iter->idx = -1;
3711 
3712 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3713 			for_each_tracing_cpu(cpu)
3714 				tracing_iter_reset(iter, cpu);
3715 		} else
3716 			tracing_iter_reset(iter, cpu_file);
3717 
3718 		iter->leftover = 0;
3719 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3720 			;
3721 
3722 	} else {
3723 		/*
3724 		 * If we overflowed the seq_file before, then we want
3725 		 * to just reuse the trace_seq buffer again.
3726 		 */
3727 		if (iter->leftover)
3728 			p = iter;
3729 		else {
3730 			l = *pos - 1;
3731 			p = s_next(m, p, &l);
3732 		}
3733 	}
3734 
3735 	trace_event_read_lock();
3736 	trace_access_lock(cpu_file);
3737 	return p;
3738 }
3739 
s_stop(struct seq_file * m,void * p)3740 static void s_stop(struct seq_file *m, void *p)
3741 {
3742 	struct trace_iterator *iter = m->private;
3743 
3744 #ifdef CONFIG_TRACER_MAX_TRACE
3745 	if (iter->snapshot && iter->trace->use_max_tr)
3746 		return;
3747 #endif
3748 
3749 	trace_access_unlock(iter->cpu_file);
3750 	trace_event_read_unlock();
3751 }
3752 
3753 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3754 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3755 		      unsigned long *entries, int cpu)
3756 {
3757 	unsigned long count;
3758 
3759 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3760 	/*
3761 	 * If this buffer has skipped entries, then we hold all
3762 	 * entries for the trace and we need to ignore the
3763 	 * ones before the time stamp.
3764 	 */
3765 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3766 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3767 		/* total is the same as the entries */
3768 		*total = count;
3769 	} else
3770 		*total = count +
3771 			ring_buffer_overrun_cpu(buf->buffer, cpu);
3772 	*entries = count;
3773 }
3774 
3775 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)3776 get_total_entries(struct array_buffer *buf,
3777 		  unsigned long *total, unsigned long *entries)
3778 {
3779 	unsigned long t, e;
3780 	int cpu;
3781 
3782 	*total = 0;
3783 	*entries = 0;
3784 
3785 	for_each_tracing_cpu(cpu) {
3786 		get_total_entries_cpu(buf, &t, &e, cpu);
3787 		*total += t;
3788 		*entries += e;
3789 	}
3790 }
3791 
trace_total_entries_cpu(struct trace_array * tr,int cpu)3792 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3793 {
3794 	unsigned long total, entries;
3795 
3796 	if (!tr)
3797 		tr = &global_trace;
3798 
3799 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
3800 
3801 	return entries;
3802 }
3803 
trace_total_entries(struct trace_array * tr)3804 unsigned long trace_total_entries(struct trace_array *tr)
3805 {
3806 	unsigned long total, entries;
3807 
3808 	if (!tr)
3809 		tr = &global_trace;
3810 
3811 	get_total_entries(&tr->array_buffer, &total, &entries);
3812 
3813 	return entries;
3814 }
3815 
print_lat_help_header(struct seq_file * m)3816 static void print_lat_help_header(struct seq_file *m)
3817 {
3818 	seq_puts(m, "#                    _------=> CPU#            \n"
3819 		    "#                   / _-----=> irqs-off        \n"
3820 		    "#                  | / _----=> need-resched    \n"
3821 		    "#                  || / _---=> hardirq/softirq \n"
3822 		    "#                  ||| / _--=> preempt-depth   \n"
3823 		    "#                  |||| /     delay            \n"
3824 		    "#  cmd     pid     ||||| time  |   caller      \n"
3825 		    "#     \\   /        |||||  \\    |   /         \n");
3826 }
3827 
print_event_info(struct array_buffer * buf,struct seq_file * m)3828 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
3829 {
3830 	unsigned long total;
3831 	unsigned long entries;
3832 
3833 	get_total_entries(buf, &total, &entries);
3834 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3835 		   entries, total, num_online_cpus());
3836 	seq_puts(m, "#\n");
3837 }
3838 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3839 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
3840 				   unsigned int flags)
3841 {
3842 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3843 
3844 	print_event_info(buf, m);
3845 
3846 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
3847 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
3848 }
3849 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)3850 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
3851 				       unsigned int flags)
3852 {
3853 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
3854 	const char *space = "            ";
3855 	int prec = tgid ? 12 : 2;
3856 
3857 	print_event_info(buf, m);
3858 
3859 	seq_printf(m, "#                            %.*s  _-----=> irqs-off\n", prec, space);
3860 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
3861 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
3862 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
3863 	seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
3864 	seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
3865 	seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
3866 }
3867 
3868 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)3869 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3870 {
3871 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3872 	struct array_buffer *buf = iter->array_buffer;
3873 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3874 	struct tracer *type = iter->trace;
3875 	unsigned long entries;
3876 	unsigned long total;
3877 	const char *name = "preemption";
3878 
3879 	name = type->name;
3880 
3881 	get_total_entries(buf, &total, &entries);
3882 
3883 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3884 		   name, UTS_RELEASE);
3885 	seq_puts(m, "# -----------------------------------"
3886 		 "---------------------------------\n");
3887 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3888 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3889 		   nsecs_to_usecs(data->saved_latency),
3890 		   entries,
3891 		   total,
3892 		   buf->cpu,
3893 #if defined(CONFIG_PREEMPT_NONE)
3894 		   "server",
3895 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3896 		   "desktop",
3897 #elif defined(CONFIG_PREEMPT)
3898 		   "preempt",
3899 #elif defined(CONFIG_PREEMPT_RT)
3900 		   "preempt_rt",
3901 #else
3902 		   "unknown",
3903 #endif
3904 		   /* These are reserved for later use */
3905 		   0, 0, 0, 0);
3906 #ifdef CONFIG_SMP
3907 	seq_printf(m, " #P:%d)\n", num_online_cpus());
3908 #else
3909 	seq_puts(m, ")\n");
3910 #endif
3911 	seq_puts(m, "#    -----------------\n");
3912 	seq_printf(m, "#    | task: %.16s-%d "
3913 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3914 		   data->comm, data->pid,
3915 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3916 		   data->policy, data->rt_priority);
3917 	seq_puts(m, "#    -----------------\n");
3918 
3919 	if (data->critical_start) {
3920 		seq_puts(m, "#  => started at: ");
3921 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3922 		trace_print_seq(m, &iter->seq);
3923 		seq_puts(m, "\n#  => ended at:   ");
3924 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3925 		trace_print_seq(m, &iter->seq);
3926 		seq_puts(m, "\n#\n");
3927 	}
3928 
3929 	seq_puts(m, "#\n");
3930 }
3931 
test_cpu_buff_start(struct trace_iterator * iter)3932 static void test_cpu_buff_start(struct trace_iterator *iter)
3933 {
3934 	struct trace_seq *s = &iter->seq;
3935 	struct trace_array *tr = iter->tr;
3936 
3937 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3938 		return;
3939 
3940 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3941 		return;
3942 
3943 	if (cpumask_available(iter->started) &&
3944 	    cpumask_test_cpu(iter->cpu, iter->started))
3945 		return;
3946 
3947 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3948 		return;
3949 
3950 	if (cpumask_available(iter->started))
3951 		cpumask_set_cpu(iter->cpu, iter->started);
3952 
3953 	/* Don't print started cpu buffer for the first entry of the trace */
3954 	if (iter->idx > 1)
3955 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3956 				iter->cpu);
3957 }
3958 
print_trace_fmt(struct trace_iterator * iter)3959 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3960 {
3961 	struct trace_array *tr = iter->tr;
3962 	struct trace_seq *s = &iter->seq;
3963 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3964 	struct trace_entry *entry;
3965 	struct trace_event *event;
3966 
3967 	entry = iter->ent;
3968 
3969 	test_cpu_buff_start(iter);
3970 
3971 	event = ftrace_find_event(entry->type);
3972 
3973 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3974 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3975 			trace_print_lat_context(iter);
3976 		else
3977 			trace_print_context(iter);
3978 	}
3979 
3980 	if (trace_seq_has_overflowed(s))
3981 		return TRACE_TYPE_PARTIAL_LINE;
3982 
3983 	if (event)
3984 		return event->funcs->trace(iter, sym_flags, event);
3985 
3986 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
3987 
3988 	return trace_handle_return(s);
3989 }
3990 
print_raw_fmt(struct trace_iterator * iter)3991 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3992 {
3993 	struct trace_array *tr = iter->tr;
3994 	struct trace_seq *s = &iter->seq;
3995 	struct trace_entry *entry;
3996 	struct trace_event *event;
3997 
3998 	entry = iter->ent;
3999 
4000 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4001 		trace_seq_printf(s, "%d %d %llu ",
4002 				 entry->pid, iter->cpu, iter->ts);
4003 
4004 	if (trace_seq_has_overflowed(s))
4005 		return TRACE_TYPE_PARTIAL_LINE;
4006 
4007 	event = ftrace_find_event(entry->type);
4008 	if (event)
4009 		return event->funcs->raw(iter, 0, event);
4010 
4011 	trace_seq_printf(s, "%d ?\n", entry->type);
4012 
4013 	return trace_handle_return(s);
4014 }
4015 
print_hex_fmt(struct trace_iterator * iter)4016 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4017 {
4018 	struct trace_array *tr = iter->tr;
4019 	struct trace_seq *s = &iter->seq;
4020 	unsigned char newline = '\n';
4021 	struct trace_entry *entry;
4022 	struct trace_event *event;
4023 
4024 	entry = iter->ent;
4025 
4026 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4027 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4028 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4029 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4030 		if (trace_seq_has_overflowed(s))
4031 			return TRACE_TYPE_PARTIAL_LINE;
4032 	}
4033 
4034 	event = ftrace_find_event(entry->type);
4035 	if (event) {
4036 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4037 		if (ret != TRACE_TYPE_HANDLED)
4038 			return ret;
4039 	}
4040 
4041 	SEQ_PUT_FIELD(s, newline);
4042 
4043 	return trace_handle_return(s);
4044 }
4045 
print_bin_fmt(struct trace_iterator * iter)4046 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4047 {
4048 	struct trace_array *tr = iter->tr;
4049 	struct trace_seq *s = &iter->seq;
4050 	struct trace_entry *entry;
4051 	struct trace_event *event;
4052 
4053 	entry = iter->ent;
4054 
4055 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4056 		SEQ_PUT_FIELD(s, entry->pid);
4057 		SEQ_PUT_FIELD(s, iter->cpu);
4058 		SEQ_PUT_FIELD(s, iter->ts);
4059 		if (trace_seq_has_overflowed(s))
4060 			return TRACE_TYPE_PARTIAL_LINE;
4061 	}
4062 
4063 	event = ftrace_find_event(entry->type);
4064 	return event ? event->funcs->binary(iter, 0, event) :
4065 		TRACE_TYPE_HANDLED;
4066 }
4067 
trace_empty(struct trace_iterator * iter)4068 int trace_empty(struct trace_iterator *iter)
4069 {
4070 	struct ring_buffer_iter *buf_iter;
4071 	int cpu;
4072 
4073 	/* If we are looking at one CPU buffer, only check that one */
4074 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4075 		cpu = iter->cpu_file;
4076 		buf_iter = trace_buffer_iter(iter, cpu);
4077 		if (buf_iter) {
4078 			if (!ring_buffer_iter_empty(buf_iter))
4079 				return 0;
4080 		} else {
4081 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4082 				return 0;
4083 		}
4084 		return 1;
4085 	}
4086 
4087 	for_each_tracing_cpu(cpu) {
4088 		buf_iter = trace_buffer_iter(iter, cpu);
4089 		if (buf_iter) {
4090 			if (!ring_buffer_iter_empty(buf_iter))
4091 				return 0;
4092 		} else {
4093 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4094 				return 0;
4095 		}
4096 	}
4097 
4098 	return 1;
4099 }
4100 
4101 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4102 enum print_line_t print_trace_line(struct trace_iterator *iter)
4103 {
4104 	struct trace_array *tr = iter->tr;
4105 	unsigned long trace_flags = tr->trace_flags;
4106 	enum print_line_t ret;
4107 
4108 	if (iter->lost_events) {
4109 		if (iter->lost_events == (unsigned long)-1)
4110 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4111 					 iter->cpu);
4112 		else
4113 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4114 					 iter->cpu, iter->lost_events);
4115 		if (trace_seq_has_overflowed(&iter->seq))
4116 			return TRACE_TYPE_PARTIAL_LINE;
4117 	}
4118 
4119 	if (iter->trace && iter->trace->print_line) {
4120 		ret = iter->trace->print_line(iter);
4121 		if (ret != TRACE_TYPE_UNHANDLED)
4122 			return ret;
4123 	}
4124 
4125 	if (iter->ent->type == TRACE_BPUTS &&
4126 			trace_flags & TRACE_ITER_PRINTK &&
4127 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4128 		return trace_print_bputs_msg_only(iter);
4129 
4130 	if (iter->ent->type == TRACE_BPRINT &&
4131 			trace_flags & TRACE_ITER_PRINTK &&
4132 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4133 		return trace_print_bprintk_msg_only(iter);
4134 
4135 	if (iter->ent->type == TRACE_PRINT &&
4136 			trace_flags & TRACE_ITER_PRINTK &&
4137 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4138 		return trace_print_printk_msg_only(iter);
4139 
4140 	if (trace_flags & TRACE_ITER_BIN)
4141 		return print_bin_fmt(iter);
4142 
4143 	if (trace_flags & TRACE_ITER_HEX)
4144 		return print_hex_fmt(iter);
4145 
4146 	if (trace_flags & TRACE_ITER_RAW)
4147 		return print_raw_fmt(iter);
4148 
4149 	return print_trace_fmt(iter);
4150 }
4151 
trace_latency_header(struct seq_file * m)4152 void trace_latency_header(struct seq_file *m)
4153 {
4154 	struct trace_iterator *iter = m->private;
4155 	struct trace_array *tr = iter->tr;
4156 
4157 	/* print nothing if the buffers are empty */
4158 	if (trace_empty(iter))
4159 		return;
4160 
4161 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4162 		print_trace_header(m, iter);
4163 
4164 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4165 		print_lat_help_header(m);
4166 }
4167 
trace_default_header(struct seq_file * m)4168 void trace_default_header(struct seq_file *m)
4169 {
4170 	struct trace_iterator *iter = m->private;
4171 	struct trace_array *tr = iter->tr;
4172 	unsigned long trace_flags = tr->trace_flags;
4173 
4174 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4175 		return;
4176 
4177 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4178 		/* print nothing if the buffers are empty */
4179 		if (trace_empty(iter))
4180 			return;
4181 		print_trace_header(m, iter);
4182 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4183 			print_lat_help_header(m);
4184 	} else {
4185 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4186 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4187 				print_func_help_header_irq(iter->array_buffer,
4188 							   m, trace_flags);
4189 			else
4190 				print_func_help_header(iter->array_buffer, m,
4191 						       trace_flags);
4192 		}
4193 	}
4194 }
4195 
test_ftrace_alive(struct seq_file * m)4196 static void test_ftrace_alive(struct seq_file *m)
4197 {
4198 	if (!ftrace_is_dead())
4199 		return;
4200 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4201 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4202 }
4203 
4204 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4205 static void show_snapshot_main_help(struct seq_file *m)
4206 {
4207 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4208 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4209 		    "#                      Takes a snapshot of the main buffer.\n"
4210 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4211 		    "#                      (Doesn't have to be '2' works with any number that\n"
4212 		    "#                       is not a '0' or '1')\n");
4213 }
4214 
show_snapshot_percpu_help(struct seq_file * m)4215 static void show_snapshot_percpu_help(struct seq_file *m)
4216 {
4217 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4218 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4219 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4220 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4221 #else
4222 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4223 		    "#                     Must use main snapshot file to allocate.\n");
4224 #endif
4225 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4226 		    "#                      (Doesn't have to be '2' works with any number that\n"
4227 		    "#                       is not a '0' or '1')\n");
4228 }
4229 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4230 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4231 {
4232 	if (iter->tr->allocated_snapshot)
4233 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4234 	else
4235 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4236 
4237 	seq_puts(m, "# Snapshot commands:\n");
4238 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4239 		show_snapshot_main_help(m);
4240 	else
4241 		show_snapshot_percpu_help(m);
4242 }
4243 #else
4244 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4245 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4246 #endif
4247 
s_show(struct seq_file * m,void * v)4248 static int s_show(struct seq_file *m, void *v)
4249 {
4250 	struct trace_iterator *iter = v;
4251 	int ret;
4252 
4253 	if (iter->ent == NULL) {
4254 		if (iter->tr) {
4255 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4256 			seq_puts(m, "#\n");
4257 			test_ftrace_alive(m);
4258 		}
4259 		if (iter->snapshot && trace_empty(iter))
4260 			print_snapshot_help(m, iter);
4261 		else if (iter->trace && iter->trace->print_header)
4262 			iter->trace->print_header(m);
4263 		else
4264 			trace_default_header(m);
4265 
4266 	} else if (iter->leftover) {
4267 		/*
4268 		 * If we filled the seq_file buffer earlier, we
4269 		 * want to just show it now.
4270 		 */
4271 		ret = trace_print_seq(m, &iter->seq);
4272 
4273 		/* ret should this time be zero, but you never know */
4274 		iter->leftover = ret;
4275 
4276 	} else {
4277 		print_trace_line(iter);
4278 		ret = trace_print_seq(m, &iter->seq);
4279 		/*
4280 		 * If we overflow the seq_file buffer, then it will
4281 		 * ask us for this data again at start up.
4282 		 * Use that instead.
4283 		 *  ret is 0 if seq_file write succeeded.
4284 		 *        -1 otherwise.
4285 		 */
4286 		iter->leftover = ret;
4287 	}
4288 
4289 	return 0;
4290 }
4291 
4292 /*
4293  * Should be used after trace_array_get(), trace_types_lock
4294  * ensures that i_cdev was already initialized.
4295  */
tracing_get_cpu(struct inode * inode)4296 static inline int tracing_get_cpu(struct inode *inode)
4297 {
4298 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4299 		return (long)inode->i_cdev - 1;
4300 	return RING_BUFFER_ALL_CPUS;
4301 }
4302 
4303 static const struct seq_operations tracer_seq_ops = {
4304 	.start		= s_start,
4305 	.next		= s_next,
4306 	.stop		= s_stop,
4307 	.show		= s_show,
4308 };
4309 
4310 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4311 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4312 {
4313 	struct trace_array *tr = inode->i_private;
4314 	struct trace_iterator *iter;
4315 	int cpu;
4316 
4317 	if (tracing_disabled)
4318 		return ERR_PTR(-ENODEV);
4319 
4320 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4321 	if (!iter)
4322 		return ERR_PTR(-ENOMEM);
4323 
4324 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4325 				    GFP_KERNEL);
4326 	if (!iter->buffer_iter)
4327 		goto release;
4328 
4329 	/*
4330 	 * trace_find_next_entry() may need to save off iter->ent.
4331 	 * It will place it into the iter->temp buffer. As most
4332 	 * events are less than 128, allocate a buffer of that size.
4333 	 * If one is greater, then trace_find_next_entry() will
4334 	 * allocate a new buffer to adjust for the bigger iter->ent.
4335 	 * It's not critical if it fails to get allocated here.
4336 	 */
4337 	iter->temp = kmalloc(128, GFP_KERNEL);
4338 	if (iter->temp)
4339 		iter->temp_size = 128;
4340 
4341 	/*
4342 	 * We make a copy of the current tracer to avoid concurrent
4343 	 * changes on it while we are reading.
4344 	 */
4345 	mutex_lock(&trace_types_lock);
4346 	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4347 	if (!iter->trace)
4348 		goto fail;
4349 
4350 	*iter->trace = *tr->current_trace;
4351 
4352 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4353 		goto fail;
4354 
4355 	iter->tr = tr;
4356 
4357 #ifdef CONFIG_TRACER_MAX_TRACE
4358 	/* Currently only the top directory has a snapshot */
4359 	if (tr->current_trace->print_max || snapshot)
4360 		iter->array_buffer = &tr->max_buffer;
4361 	else
4362 #endif
4363 		iter->array_buffer = &tr->array_buffer;
4364 	iter->snapshot = snapshot;
4365 	iter->pos = -1;
4366 	iter->cpu_file = tracing_get_cpu(inode);
4367 	mutex_init(&iter->mutex);
4368 
4369 	/* Notify the tracer early; before we stop tracing. */
4370 	if (iter->trace->open)
4371 		iter->trace->open(iter);
4372 
4373 	/* Annotate start of buffers if we had overruns */
4374 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4375 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4376 
4377 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4378 	if (trace_clocks[tr->clock_id].in_ns)
4379 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4380 
4381 	/*
4382 	 * If pause-on-trace is enabled, then stop the trace while
4383 	 * dumping, unless this is the "snapshot" file
4384 	 */
4385 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4386 		tracing_stop_tr(tr);
4387 
4388 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4389 		for_each_tracing_cpu(cpu) {
4390 			iter->buffer_iter[cpu] =
4391 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4392 							 cpu, GFP_KERNEL);
4393 		}
4394 		ring_buffer_read_prepare_sync();
4395 		for_each_tracing_cpu(cpu) {
4396 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4397 			tracing_iter_reset(iter, cpu);
4398 		}
4399 	} else {
4400 		cpu = iter->cpu_file;
4401 		iter->buffer_iter[cpu] =
4402 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4403 						 cpu, GFP_KERNEL);
4404 		ring_buffer_read_prepare_sync();
4405 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4406 		tracing_iter_reset(iter, cpu);
4407 	}
4408 
4409 	mutex_unlock(&trace_types_lock);
4410 
4411 	return iter;
4412 
4413  fail:
4414 	mutex_unlock(&trace_types_lock);
4415 	kfree(iter->trace);
4416 	kfree(iter->temp);
4417 	kfree(iter->buffer_iter);
4418 release:
4419 	seq_release_private(inode, file);
4420 	return ERR_PTR(-ENOMEM);
4421 }
4422 
tracing_open_generic(struct inode * inode,struct file * filp)4423 int tracing_open_generic(struct inode *inode, struct file *filp)
4424 {
4425 	int ret;
4426 
4427 	ret = tracing_check_open_get_tr(NULL);
4428 	if (ret)
4429 		return ret;
4430 
4431 	filp->private_data = inode->i_private;
4432 	return 0;
4433 }
4434 
tracing_is_disabled(void)4435 bool tracing_is_disabled(void)
4436 {
4437 	return (tracing_disabled) ? true: false;
4438 }
4439 
4440 /*
4441  * Open and update trace_array ref count.
4442  * Must have the current trace_array passed to it.
4443  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4444 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4445 {
4446 	struct trace_array *tr = inode->i_private;
4447 	int ret;
4448 
4449 	ret = tracing_check_open_get_tr(tr);
4450 	if (ret)
4451 		return ret;
4452 
4453 	filp->private_data = inode->i_private;
4454 
4455 	return 0;
4456 }
4457 
tracing_release(struct inode * inode,struct file * file)4458 static int tracing_release(struct inode *inode, struct file *file)
4459 {
4460 	struct trace_array *tr = inode->i_private;
4461 	struct seq_file *m = file->private_data;
4462 	struct trace_iterator *iter;
4463 	int cpu;
4464 
4465 	if (!(file->f_mode & FMODE_READ)) {
4466 		trace_array_put(tr);
4467 		return 0;
4468 	}
4469 
4470 	/* Writes do not use seq_file */
4471 	iter = m->private;
4472 	mutex_lock(&trace_types_lock);
4473 
4474 	for_each_tracing_cpu(cpu) {
4475 		if (iter->buffer_iter[cpu])
4476 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4477 	}
4478 
4479 	if (iter->trace && iter->trace->close)
4480 		iter->trace->close(iter);
4481 
4482 	if (!iter->snapshot && tr->stop_count)
4483 		/* reenable tracing if it was previously enabled */
4484 		tracing_start_tr(tr);
4485 
4486 	__trace_array_put(tr);
4487 
4488 	mutex_unlock(&trace_types_lock);
4489 
4490 	mutex_destroy(&iter->mutex);
4491 	free_cpumask_var(iter->started);
4492 	kfree(iter->temp);
4493 	kfree(iter->trace);
4494 	kfree(iter->buffer_iter);
4495 	seq_release_private(inode, file);
4496 
4497 	return 0;
4498 }
4499 
tracing_release_generic_tr(struct inode * inode,struct file * file)4500 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4501 {
4502 	struct trace_array *tr = inode->i_private;
4503 
4504 	trace_array_put(tr);
4505 	return 0;
4506 }
4507 
tracing_single_release_tr(struct inode * inode,struct file * file)4508 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4509 {
4510 	struct trace_array *tr = inode->i_private;
4511 
4512 	trace_array_put(tr);
4513 
4514 	return single_release(inode, file);
4515 }
4516 
tracing_open(struct inode * inode,struct file * file)4517 static int tracing_open(struct inode *inode, struct file *file)
4518 {
4519 	struct trace_array *tr = inode->i_private;
4520 	struct trace_iterator *iter;
4521 	int ret;
4522 
4523 	ret = tracing_check_open_get_tr(tr);
4524 	if (ret)
4525 		return ret;
4526 
4527 	/* If this file was open for write, then erase contents */
4528 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4529 		int cpu = tracing_get_cpu(inode);
4530 		struct array_buffer *trace_buf = &tr->array_buffer;
4531 
4532 #ifdef CONFIG_TRACER_MAX_TRACE
4533 		if (tr->current_trace->print_max)
4534 			trace_buf = &tr->max_buffer;
4535 #endif
4536 
4537 		if (cpu == RING_BUFFER_ALL_CPUS)
4538 			tracing_reset_online_cpus(trace_buf);
4539 		else
4540 			tracing_reset_cpu(trace_buf, cpu);
4541 	}
4542 
4543 	if (file->f_mode & FMODE_READ) {
4544 		iter = __tracing_open(inode, file, false);
4545 		if (IS_ERR(iter))
4546 			ret = PTR_ERR(iter);
4547 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4548 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4549 	}
4550 
4551 	if (ret < 0)
4552 		trace_array_put(tr);
4553 
4554 	return ret;
4555 }
4556 
4557 /*
4558  * Some tracers are not suitable for instance buffers.
4559  * A tracer is always available for the global array (toplevel)
4560  * or if it explicitly states that it is.
4561  */
4562 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4563 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4564 {
4565 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4566 }
4567 
4568 /* Find the next tracer that this trace array may use */
4569 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4570 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4571 {
4572 	while (t && !trace_ok_for_array(t, tr))
4573 		t = t->next;
4574 
4575 	return t;
4576 }
4577 
4578 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4579 t_next(struct seq_file *m, void *v, loff_t *pos)
4580 {
4581 	struct trace_array *tr = m->private;
4582 	struct tracer *t = v;
4583 
4584 	(*pos)++;
4585 
4586 	if (t)
4587 		t = get_tracer_for_array(tr, t->next);
4588 
4589 	return t;
4590 }
4591 
t_start(struct seq_file * m,loff_t * pos)4592 static void *t_start(struct seq_file *m, loff_t *pos)
4593 {
4594 	struct trace_array *tr = m->private;
4595 	struct tracer *t;
4596 	loff_t l = 0;
4597 
4598 	mutex_lock(&trace_types_lock);
4599 
4600 	t = get_tracer_for_array(tr, trace_types);
4601 	for (; t && l < *pos; t = t_next(m, t, &l))
4602 			;
4603 
4604 	return t;
4605 }
4606 
t_stop(struct seq_file * m,void * p)4607 static void t_stop(struct seq_file *m, void *p)
4608 {
4609 	mutex_unlock(&trace_types_lock);
4610 }
4611 
t_show(struct seq_file * m,void * v)4612 static int t_show(struct seq_file *m, void *v)
4613 {
4614 	struct tracer *t = v;
4615 
4616 	if (!t)
4617 		return 0;
4618 
4619 	seq_puts(m, t->name);
4620 	if (t->next)
4621 		seq_putc(m, ' ');
4622 	else
4623 		seq_putc(m, '\n');
4624 
4625 	return 0;
4626 }
4627 
4628 static const struct seq_operations show_traces_seq_ops = {
4629 	.start		= t_start,
4630 	.next		= t_next,
4631 	.stop		= t_stop,
4632 	.show		= t_show,
4633 };
4634 
show_traces_open(struct inode * inode,struct file * file)4635 static int show_traces_open(struct inode *inode, struct file *file)
4636 {
4637 	struct trace_array *tr = inode->i_private;
4638 	struct seq_file *m;
4639 	int ret;
4640 
4641 	ret = tracing_check_open_get_tr(tr);
4642 	if (ret)
4643 		return ret;
4644 
4645 	ret = seq_open(file, &show_traces_seq_ops);
4646 	if (ret) {
4647 		trace_array_put(tr);
4648 		return ret;
4649 	}
4650 
4651 	m = file->private_data;
4652 	m->private = tr;
4653 
4654 	return 0;
4655 }
4656 
show_traces_release(struct inode * inode,struct file * file)4657 static int show_traces_release(struct inode *inode, struct file *file)
4658 {
4659 	struct trace_array *tr = inode->i_private;
4660 
4661 	trace_array_put(tr);
4662 	return seq_release(inode, file);
4663 }
4664 
4665 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4666 tracing_write_stub(struct file *filp, const char __user *ubuf,
4667 		   size_t count, loff_t *ppos)
4668 {
4669 	return count;
4670 }
4671 
tracing_lseek(struct file * file,loff_t offset,int whence)4672 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4673 {
4674 	int ret;
4675 
4676 	if (file->f_mode & FMODE_READ)
4677 		ret = seq_lseek(file, offset, whence);
4678 	else
4679 		file->f_pos = ret = 0;
4680 
4681 	return ret;
4682 }
4683 
4684 static const struct file_operations tracing_fops = {
4685 	.open		= tracing_open,
4686 	.read		= seq_read,
4687 	.write		= tracing_write_stub,
4688 	.llseek		= tracing_lseek,
4689 	.release	= tracing_release,
4690 };
4691 
4692 static const struct file_operations show_traces_fops = {
4693 	.open		= show_traces_open,
4694 	.read		= seq_read,
4695 	.llseek		= seq_lseek,
4696 	.release	= show_traces_release,
4697 };
4698 
4699 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)4700 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4701 		     size_t count, loff_t *ppos)
4702 {
4703 	struct trace_array *tr = file_inode(filp)->i_private;
4704 	char *mask_str;
4705 	int len;
4706 
4707 	len = snprintf(NULL, 0, "%*pb\n",
4708 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
4709 	mask_str = kmalloc(len, GFP_KERNEL);
4710 	if (!mask_str)
4711 		return -ENOMEM;
4712 
4713 	len = snprintf(mask_str, len, "%*pb\n",
4714 		       cpumask_pr_args(tr->tracing_cpumask));
4715 	if (len >= count) {
4716 		count = -EINVAL;
4717 		goto out_err;
4718 	}
4719 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4720 
4721 out_err:
4722 	kfree(mask_str);
4723 
4724 	return count;
4725 }
4726 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)4727 int tracing_set_cpumask(struct trace_array *tr,
4728 			cpumask_var_t tracing_cpumask_new)
4729 {
4730 	int cpu;
4731 
4732 	if (!tr)
4733 		return -EINVAL;
4734 
4735 	local_irq_disable();
4736 	arch_spin_lock(&tr->max_lock);
4737 	for_each_tracing_cpu(cpu) {
4738 		/*
4739 		 * Increase/decrease the disabled counter if we are
4740 		 * about to flip a bit in the cpumask:
4741 		 */
4742 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4743 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4744 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4745 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
4746 		}
4747 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4748 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4749 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4750 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
4751 		}
4752 	}
4753 	arch_spin_unlock(&tr->max_lock);
4754 	local_irq_enable();
4755 
4756 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4757 
4758 	return 0;
4759 }
4760 
4761 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4762 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4763 		      size_t count, loff_t *ppos)
4764 {
4765 	struct trace_array *tr = file_inode(filp)->i_private;
4766 	cpumask_var_t tracing_cpumask_new;
4767 	int err;
4768 
4769 	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4770 		return -ENOMEM;
4771 
4772 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4773 	if (err)
4774 		goto err_free;
4775 
4776 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
4777 	if (err)
4778 		goto err_free;
4779 
4780 	free_cpumask_var(tracing_cpumask_new);
4781 
4782 	return count;
4783 
4784 err_free:
4785 	free_cpumask_var(tracing_cpumask_new);
4786 
4787 	return err;
4788 }
4789 
4790 static const struct file_operations tracing_cpumask_fops = {
4791 	.open		= tracing_open_generic_tr,
4792 	.read		= tracing_cpumask_read,
4793 	.write		= tracing_cpumask_write,
4794 	.release	= tracing_release_generic_tr,
4795 	.llseek		= generic_file_llseek,
4796 };
4797 
tracing_trace_options_show(struct seq_file * m,void * v)4798 static int tracing_trace_options_show(struct seq_file *m, void *v)
4799 {
4800 	struct tracer_opt *trace_opts;
4801 	struct trace_array *tr = m->private;
4802 	u32 tracer_flags;
4803 	int i;
4804 
4805 	mutex_lock(&trace_types_lock);
4806 	tracer_flags = tr->current_trace->flags->val;
4807 	trace_opts = tr->current_trace->flags->opts;
4808 
4809 	for (i = 0; trace_options[i]; i++) {
4810 		if (tr->trace_flags & (1 << i))
4811 			seq_printf(m, "%s\n", trace_options[i]);
4812 		else
4813 			seq_printf(m, "no%s\n", trace_options[i]);
4814 	}
4815 
4816 	for (i = 0; trace_opts[i].name; i++) {
4817 		if (tracer_flags & trace_opts[i].bit)
4818 			seq_printf(m, "%s\n", trace_opts[i].name);
4819 		else
4820 			seq_printf(m, "no%s\n", trace_opts[i].name);
4821 	}
4822 	mutex_unlock(&trace_types_lock);
4823 
4824 	return 0;
4825 }
4826 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)4827 static int __set_tracer_option(struct trace_array *tr,
4828 			       struct tracer_flags *tracer_flags,
4829 			       struct tracer_opt *opts, int neg)
4830 {
4831 	struct tracer *trace = tracer_flags->trace;
4832 	int ret;
4833 
4834 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4835 	if (ret)
4836 		return ret;
4837 
4838 	if (neg)
4839 		tracer_flags->val &= ~opts->bit;
4840 	else
4841 		tracer_flags->val |= opts->bit;
4842 	return 0;
4843 }
4844 
4845 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)4846 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4847 {
4848 	struct tracer *trace = tr->current_trace;
4849 	struct tracer_flags *tracer_flags = trace->flags;
4850 	struct tracer_opt *opts = NULL;
4851 	int i;
4852 
4853 	for (i = 0; tracer_flags->opts[i].name; i++) {
4854 		opts = &tracer_flags->opts[i];
4855 
4856 		if (strcmp(cmp, opts->name) == 0)
4857 			return __set_tracer_option(tr, trace->flags, opts, neg);
4858 	}
4859 
4860 	return -EINVAL;
4861 }
4862 
4863 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)4864 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4865 {
4866 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4867 		return -1;
4868 
4869 	return 0;
4870 }
4871 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)4872 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4873 {
4874 	int *map;
4875 
4876 	if ((mask == TRACE_ITER_RECORD_TGID) ||
4877 	    (mask == TRACE_ITER_RECORD_CMD))
4878 		lockdep_assert_held(&event_mutex);
4879 
4880 	/* do nothing if flag is already set */
4881 	if (!!(tr->trace_flags & mask) == !!enabled)
4882 		return 0;
4883 
4884 	/* Give the tracer a chance to approve the change */
4885 	if (tr->current_trace->flag_changed)
4886 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4887 			return -EINVAL;
4888 
4889 	if (enabled)
4890 		tr->trace_flags |= mask;
4891 	else
4892 		tr->trace_flags &= ~mask;
4893 
4894 	if (mask == TRACE_ITER_RECORD_CMD)
4895 		trace_event_enable_cmd_record(enabled);
4896 
4897 	if (mask == TRACE_ITER_RECORD_TGID) {
4898 		if (!tgid_map) {
4899 			tgid_map_max = pid_max;
4900 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4901 				       GFP_KERNEL);
4902 
4903 			/*
4904 			 * Pairs with smp_load_acquire() in
4905 			 * trace_find_tgid_ptr() to ensure that if it observes
4906 			 * the tgid_map we just allocated then it also observes
4907 			 * the corresponding tgid_map_max value.
4908 			 */
4909 			smp_store_release(&tgid_map, map);
4910 		}
4911 		if (!tgid_map) {
4912 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4913 			return -ENOMEM;
4914 		}
4915 
4916 		trace_event_enable_tgid_record(enabled);
4917 	}
4918 
4919 	if (mask == TRACE_ITER_EVENT_FORK)
4920 		trace_event_follow_fork(tr, enabled);
4921 
4922 	if (mask == TRACE_ITER_FUNC_FORK)
4923 		ftrace_pid_follow_fork(tr, enabled);
4924 
4925 	if (mask == TRACE_ITER_OVERWRITE) {
4926 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4927 #ifdef CONFIG_TRACER_MAX_TRACE
4928 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4929 #endif
4930 	}
4931 
4932 	if (mask == TRACE_ITER_PRINTK) {
4933 		trace_printk_start_stop_comm(enabled);
4934 		trace_printk_control(enabled);
4935 	}
4936 
4937 	return 0;
4938 }
4939 
trace_set_options(struct trace_array * tr,char * option)4940 int trace_set_options(struct trace_array *tr, char *option)
4941 {
4942 	char *cmp;
4943 	int neg = 0;
4944 	int ret;
4945 	size_t orig_len = strlen(option);
4946 	int len;
4947 
4948 	cmp = strstrip(option);
4949 
4950 	len = str_has_prefix(cmp, "no");
4951 	if (len)
4952 		neg = 1;
4953 
4954 	cmp += len;
4955 
4956 	mutex_lock(&event_mutex);
4957 	mutex_lock(&trace_types_lock);
4958 
4959 	ret = match_string(trace_options, -1, cmp);
4960 	/* If no option could be set, test the specific tracer options */
4961 	if (ret < 0)
4962 		ret = set_tracer_option(tr, cmp, neg);
4963 	else
4964 		ret = set_tracer_flag(tr, 1 << ret, !neg);
4965 
4966 	mutex_unlock(&trace_types_lock);
4967 	mutex_unlock(&event_mutex);
4968 
4969 	/*
4970 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
4971 	 * turn it back into a space.
4972 	 */
4973 	if (orig_len > strlen(option))
4974 		option[strlen(option)] = ' ';
4975 
4976 	return ret;
4977 }
4978 
apply_trace_boot_options(void)4979 static void __init apply_trace_boot_options(void)
4980 {
4981 	char *buf = trace_boot_options_buf;
4982 	char *option;
4983 
4984 	while (true) {
4985 		option = strsep(&buf, ",");
4986 
4987 		if (!option)
4988 			break;
4989 
4990 		if (*option)
4991 			trace_set_options(&global_trace, option);
4992 
4993 		/* Put back the comma to allow this to be called again */
4994 		if (buf)
4995 			*(buf - 1) = ',';
4996 	}
4997 }
4998 
4999 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5000 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5001 			size_t cnt, loff_t *ppos)
5002 {
5003 	struct seq_file *m = filp->private_data;
5004 	struct trace_array *tr = m->private;
5005 	char buf[64];
5006 	int ret;
5007 
5008 	if (cnt >= sizeof(buf))
5009 		return -EINVAL;
5010 
5011 	if (copy_from_user(buf, ubuf, cnt))
5012 		return -EFAULT;
5013 
5014 	buf[cnt] = 0;
5015 
5016 	ret = trace_set_options(tr, buf);
5017 	if (ret < 0)
5018 		return ret;
5019 
5020 	*ppos += cnt;
5021 
5022 	return cnt;
5023 }
5024 
tracing_trace_options_open(struct inode * inode,struct file * file)5025 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5026 {
5027 	struct trace_array *tr = inode->i_private;
5028 	int ret;
5029 
5030 	ret = tracing_check_open_get_tr(tr);
5031 	if (ret)
5032 		return ret;
5033 
5034 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5035 	if (ret < 0)
5036 		trace_array_put(tr);
5037 
5038 	return ret;
5039 }
5040 
5041 static const struct file_operations tracing_iter_fops = {
5042 	.open		= tracing_trace_options_open,
5043 	.read		= seq_read,
5044 	.llseek		= seq_lseek,
5045 	.release	= tracing_single_release_tr,
5046 	.write		= tracing_trace_options_write,
5047 };
5048 
5049 static const char readme_msg[] =
5050 	"tracing mini-HOWTO:\n\n"
5051 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5052 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5053 	" Important files:\n"
5054 	"  trace\t\t\t- The static contents of the buffer\n"
5055 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5056 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5057 	"  current_tracer\t- function and latency tracers\n"
5058 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5059 	"  error_log\t- error log for failed commands (that support it)\n"
5060 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5061 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5062 	"  trace_clock\t\t-change the clock used to order events\n"
5063 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5064 	"      global:   Synced across CPUs but slows tracing down.\n"
5065 	"     counter:   Not a clock, but just an increment\n"
5066 	"      uptime:   Jiffy counter from time of boot\n"
5067 	"        perf:   Same clock that perf events use\n"
5068 #ifdef CONFIG_X86_64
5069 	"     x86-tsc:   TSC cycle counter\n"
5070 #endif
5071 	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
5072 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5073 	"    absolute:   Absolute (standalone) timestamp\n"
5074 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5075 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5076 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5077 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5078 	"\t\t\t  Remove sub-buffer with rmdir\n"
5079 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5080 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5081 	"\t\t\t  option name\n"
5082 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5083 #ifdef CONFIG_DYNAMIC_FTRACE
5084 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5085 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5086 	"\t\t\t  functions\n"
5087 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5088 	"\t     modules: Can select a group via module\n"
5089 	"\t      Format: :mod:<module-name>\n"
5090 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5091 	"\t    triggers: a command to perform when function is hit\n"
5092 	"\t      Format: <function>:<trigger>[:count]\n"
5093 	"\t     trigger: traceon, traceoff\n"
5094 	"\t\t      enable_event:<system>:<event>\n"
5095 	"\t\t      disable_event:<system>:<event>\n"
5096 #ifdef CONFIG_STACKTRACE
5097 	"\t\t      stacktrace\n"
5098 #endif
5099 #ifdef CONFIG_TRACER_SNAPSHOT
5100 	"\t\t      snapshot\n"
5101 #endif
5102 	"\t\t      dump\n"
5103 	"\t\t      cpudump\n"
5104 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5105 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5106 	"\t     The first one will disable tracing every time do_fault is hit\n"
5107 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5108 	"\t       The first time do trap is hit and it disables tracing, the\n"
5109 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5110 	"\t       the counter will not decrement. It only decrements when the\n"
5111 	"\t       trigger did work\n"
5112 	"\t     To remove trigger without count:\n"
5113 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5114 	"\t     To remove trigger with a count:\n"
5115 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5116 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5117 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5118 	"\t    modules: Can select a group via module command :mod:\n"
5119 	"\t    Does not accept triggers\n"
5120 #endif /* CONFIG_DYNAMIC_FTRACE */
5121 #ifdef CONFIG_FUNCTION_TRACER
5122 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5123 	"\t\t    (function)\n"
5124 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5125 	"\t\t    (function)\n"
5126 #endif
5127 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5128 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5129 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5130 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5131 #endif
5132 #ifdef CONFIG_TRACER_SNAPSHOT
5133 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5134 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5135 	"\t\t\t  information\n"
5136 #endif
5137 #ifdef CONFIG_STACK_TRACER
5138 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5139 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5140 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5141 	"\t\t\t  new trace)\n"
5142 #ifdef CONFIG_DYNAMIC_FTRACE
5143 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5144 	"\t\t\t  traces\n"
5145 #endif
5146 #endif /* CONFIG_STACK_TRACER */
5147 #ifdef CONFIG_DYNAMIC_EVENTS
5148 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5149 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5150 #endif
5151 #ifdef CONFIG_KPROBE_EVENTS
5152 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5153 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5154 #endif
5155 #ifdef CONFIG_UPROBE_EVENTS
5156 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5157 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5158 #endif
5159 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5160 	"\t  accepts: event-definitions (one definition per line)\n"
5161 	"\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
5162 	"\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5163 #ifdef CONFIG_HIST_TRIGGERS
5164 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5165 #endif
5166 	"\t           -:[<group>/]<event>\n"
5167 #ifdef CONFIG_KPROBE_EVENTS
5168 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5169   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5170 #endif
5171 #ifdef CONFIG_UPROBE_EVENTS
5172   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5173 #endif
5174 	"\t     args: <name>=fetcharg[:type]\n"
5175 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5176 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5177 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5178 #else
5179 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5180 #endif
5181 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5182 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5183 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5184 	"\t           <type>\\[<array-size>\\]\n"
5185 #ifdef CONFIG_HIST_TRIGGERS
5186 	"\t    field: <stype> <name>;\n"
5187 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5188 	"\t           [unsigned] char/int/long\n"
5189 #endif
5190 #endif
5191 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5192 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5193 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5194 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5195 	"\t\t\t  events\n"
5196 	"      filter\t\t- If set, only events passing filter are traced\n"
5197 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5198 	"\t\t\t  <event>:\n"
5199 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5200 	"      filter\t\t- If set, only events passing filter are traced\n"
5201 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5202 	"\t    Format: <trigger>[:count][if <filter>]\n"
5203 	"\t   trigger: traceon, traceoff\n"
5204 	"\t            enable_event:<system>:<event>\n"
5205 	"\t            disable_event:<system>:<event>\n"
5206 #ifdef CONFIG_HIST_TRIGGERS
5207 	"\t            enable_hist:<system>:<event>\n"
5208 	"\t            disable_hist:<system>:<event>\n"
5209 #endif
5210 #ifdef CONFIG_STACKTRACE
5211 	"\t\t    stacktrace\n"
5212 #endif
5213 #ifdef CONFIG_TRACER_SNAPSHOT
5214 	"\t\t    snapshot\n"
5215 #endif
5216 #ifdef CONFIG_HIST_TRIGGERS
5217 	"\t\t    hist (see below)\n"
5218 #endif
5219 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5220 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5221 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5222 	"\t                  events/block/block_unplug/trigger\n"
5223 	"\t   The first disables tracing every time block_unplug is hit.\n"
5224 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5225 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5226 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5227 	"\t   Like function triggers, the counter is only decremented if it\n"
5228 	"\t    enabled or disabled tracing.\n"
5229 	"\t   To remove a trigger without a count:\n"
5230 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5231 	"\t   To remove a trigger with a count:\n"
5232 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5233 	"\t   Filters can be ignored when removing a trigger.\n"
5234 #ifdef CONFIG_HIST_TRIGGERS
5235 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5236 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5237 	"\t            [:values=<field1[,field2,...]>]\n"
5238 	"\t            [:sort=<field1[,field2,...]>]\n"
5239 	"\t            [:size=#entries]\n"
5240 	"\t            [:pause][:continue][:clear]\n"
5241 	"\t            [:name=histname1]\n"
5242 	"\t            [:<handler>.<action>]\n"
5243 	"\t            [if <filter>]\n\n"
5244 	"\t    Note, special fields can be used as well:\n"
5245 	"\t            common_timestamp - to record current timestamp\n"
5246 	"\t            common_cpu - to record the CPU the event happened on\n"
5247 	"\n"
5248 	"\t    When a matching event is hit, an entry is added to a hash\n"
5249 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5250 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5251 	"\t    correspond to fields in the event's format description.  Keys\n"
5252 	"\t    can be any field, or the special string 'stacktrace'.\n"
5253 	"\t    Compound keys consisting of up to two fields can be specified\n"
5254 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5255 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5256 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5257 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5258 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5259 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5260 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5261 	"\t    its histogram data will be shared with other triggers of the\n"
5262 	"\t    same name, and trigger hits will update this common data.\n\n"
5263 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5264 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5265 	"\t    triggers attached to an event, there will be a table for each\n"
5266 	"\t    trigger in the output.  The table displayed for a named\n"
5267 	"\t    trigger will be the same as any other instance having the\n"
5268 	"\t    same name.  The default format used to display a given field\n"
5269 	"\t    can be modified by appending any of the following modifiers\n"
5270 	"\t    to the field name, as applicable:\n\n"
5271 	"\t            .hex        display a number as a hex value\n"
5272 	"\t            .sym        display an address as a symbol\n"
5273 	"\t            .sym-offset display an address as a symbol and offset\n"
5274 	"\t            .execname   display a common_pid as a program name\n"
5275 	"\t            .syscall    display a syscall id as a syscall name\n"
5276 	"\t            .log2       display log2 value rather than raw number\n"
5277 	"\t            .usecs      display a common_timestamp in microseconds\n\n"
5278 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5279 	"\t    trigger or to start a hist trigger but not log any events\n"
5280 	"\t    until told to do so.  'continue' can be used to start or\n"
5281 	"\t    restart a paused hist trigger.\n\n"
5282 	"\t    The 'clear' parameter will clear the contents of a running\n"
5283 	"\t    hist trigger and leave its current paused/active state\n"
5284 	"\t    unchanged.\n\n"
5285 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5286 	"\t    have one event conditionally start and stop another event's\n"
5287 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5288 	"\t    the enable_event and disable_event triggers.\n\n"
5289 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5290 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5291 	"\t        <handler>.<action>\n\n"
5292 	"\t    The available handlers are:\n\n"
5293 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5294 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5295 	"\t        onchange(var)            - invoke action if var changes\n\n"
5296 	"\t    The available actions are:\n\n"
5297 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5298 	"\t        save(field,...)                      - save current event fields\n"
5299 #ifdef CONFIG_TRACER_SNAPSHOT
5300 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5301 #endif
5302 #ifdef CONFIG_SYNTH_EVENTS
5303 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5304 	"\t  Write into this file to define/undefine new synthetic events.\n"
5305 	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5306 #endif
5307 #endif
5308 ;
5309 
5310 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5311 tracing_readme_read(struct file *filp, char __user *ubuf,
5312 		       size_t cnt, loff_t *ppos)
5313 {
5314 	return simple_read_from_buffer(ubuf, cnt, ppos,
5315 					readme_msg, strlen(readme_msg));
5316 }
5317 
5318 static const struct file_operations tracing_readme_fops = {
5319 	.open		= tracing_open_generic,
5320 	.read		= tracing_readme_read,
5321 	.llseek		= generic_file_llseek,
5322 };
5323 
saved_tgids_next(struct seq_file * m,void * v,loff_t * pos)5324 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5325 {
5326 	int pid = ++(*pos);
5327 
5328 	return trace_find_tgid_ptr(pid);
5329 }
5330 
saved_tgids_start(struct seq_file * m,loff_t * pos)5331 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5332 {
5333 	int pid = *pos;
5334 
5335 	return trace_find_tgid_ptr(pid);
5336 }
5337 
saved_tgids_stop(struct seq_file * m,void * v)5338 static void saved_tgids_stop(struct seq_file *m, void *v)
5339 {
5340 }
5341 
saved_tgids_show(struct seq_file * m,void * v)5342 static int saved_tgids_show(struct seq_file *m, void *v)
5343 {
5344 	int *entry = (int *)v;
5345 	int pid = entry - tgid_map;
5346 	int tgid = *entry;
5347 
5348 	if (tgid == 0)
5349 		return SEQ_SKIP;
5350 
5351 	seq_printf(m, "%d %d\n", pid, tgid);
5352 	return 0;
5353 }
5354 
5355 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5356 	.start		= saved_tgids_start,
5357 	.stop		= saved_tgids_stop,
5358 	.next		= saved_tgids_next,
5359 	.show		= saved_tgids_show,
5360 };
5361 
tracing_saved_tgids_open(struct inode * inode,struct file * filp)5362 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5363 {
5364 	int ret;
5365 
5366 	ret = tracing_check_open_get_tr(NULL);
5367 	if (ret)
5368 		return ret;
5369 
5370 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5371 }
5372 
5373 
5374 static const struct file_operations tracing_saved_tgids_fops = {
5375 	.open		= tracing_saved_tgids_open,
5376 	.read		= seq_read,
5377 	.llseek		= seq_lseek,
5378 	.release	= seq_release,
5379 };
5380 
saved_cmdlines_next(struct seq_file * m,void * v,loff_t * pos)5381 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5382 {
5383 	unsigned int *ptr = v;
5384 
5385 	if (*pos || m->count)
5386 		ptr++;
5387 
5388 	(*pos)++;
5389 
5390 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5391 	     ptr++) {
5392 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5393 			continue;
5394 
5395 		return ptr;
5396 	}
5397 
5398 	return NULL;
5399 }
5400 
saved_cmdlines_start(struct seq_file * m,loff_t * pos)5401 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5402 {
5403 	void *v;
5404 	loff_t l = 0;
5405 
5406 	preempt_disable();
5407 	arch_spin_lock(&trace_cmdline_lock);
5408 
5409 	v = &savedcmd->map_cmdline_to_pid[0];
5410 	while (l <= *pos) {
5411 		v = saved_cmdlines_next(m, v, &l);
5412 		if (!v)
5413 			return NULL;
5414 	}
5415 
5416 	return v;
5417 }
5418 
saved_cmdlines_stop(struct seq_file * m,void * v)5419 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5420 {
5421 	arch_spin_unlock(&trace_cmdline_lock);
5422 	preempt_enable();
5423 }
5424 
saved_cmdlines_show(struct seq_file * m,void * v)5425 static int saved_cmdlines_show(struct seq_file *m, void *v)
5426 {
5427 	char buf[TASK_COMM_LEN];
5428 	unsigned int *pid = v;
5429 
5430 	__trace_find_cmdline(*pid, buf);
5431 	seq_printf(m, "%d %s\n", *pid, buf);
5432 	return 0;
5433 }
5434 
5435 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5436 	.start		= saved_cmdlines_start,
5437 	.next		= saved_cmdlines_next,
5438 	.stop		= saved_cmdlines_stop,
5439 	.show		= saved_cmdlines_show,
5440 };
5441 
tracing_saved_cmdlines_open(struct inode * inode,struct file * filp)5442 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5443 {
5444 	int ret;
5445 
5446 	ret = tracing_check_open_get_tr(NULL);
5447 	if (ret)
5448 		return ret;
5449 
5450 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5451 }
5452 
5453 static const struct file_operations tracing_saved_cmdlines_fops = {
5454 	.open		= tracing_saved_cmdlines_open,
5455 	.read		= seq_read,
5456 	.llseek		= seq_lseek,
5457 	.release	= seq_release,
5458 };
5459 
5460 static ssize_t
tracing_saved_cmdlines_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5461 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5462 				 size_t cnt, loff_t *ppos)
5463 {
5464 	char buf[64];
5465 	int r;
5466 
5467 	arch_spin_lock(&trace_cmdline_lock);
5468 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5469 	arch_spin_unlock(&trace_cmdline_lock);
5470 
5471 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5472 }
5473 
free_saved_cmdlines_buffer(struct saved_cmdlines_buffer * s)5474 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5475 {
5476 	kfree(s->saved_cmdlines);
5477 	kfree(s->map_cmdline_to_pid);
5478 	kfree(s);
5479 }
5480 
tracing_resize_saved_cmdlines(unsigned int val)5481 static int tracing_resize_saved_cmdlines(unsigned int val)
5482 {
5483 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
5484 
5485 	s = kmalloc(sizeof(*s), GFP_KERNEL);
5486 	if (!s)
5487 		return -ENOMEM;
5488 
5489 	if (allocate_cmdlines_buffer(val, s) < 0) {
5490 		kfree(s);
5491 		return -ENOMEM;
5492 	}
5493 
5494 	arch_spin_lock(&trace_cmdline_lock);
5495 	savedcmd_temp = savedcmd;
5496 	savedcmd = s;
5497 	arch_spin_unlock(&trace_cmdline_lock);
5498 	free_saved_cmdlines_buffer(savedcmd_temp);
5499 
5500 	return 0;
5501 }
5502 
5503 static ssize_t
tracing_saved_cmdlines_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5504 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5505 				  size_t cnt, loff_t *ppos)
5506 {
5507 	unsigned long val;
5508 	int ret;
5509 
5510 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5511 	if (ret)
5512 		return ret;
5513 
5514 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
5515 	if (!val || val > PID_MAX_DEFAULT)
5516 		return -EINVAL;
5517 
5518 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
5519 	if (ret < 0)
5520 		return ret;
5521 
5522 	*ppos += cnt;
5523 
5524 	return cnt;
5525 }
5526 
5527 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5528 	.open		= tracing_open_generic,
5529 	.read		= tracing_saved_cmdlines_size_read,
5530 	.write		= tracing_saved_cmdlines_size_write,
5531 };
5532 
5533 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5534 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5535 update_eval_map(union trace_eval_map_item *ptr)
5536 {
5537 	if (!ptr->map.eval_string) {
5538 		if (ptr->tail.next) {
5539 			ptr = ptr->tail.next;
5540 			/* Set ptr to the next real item (skip head) */
5541 			ptr++;
5542 		} else
5543 			return NULL;
5544 	}
5545 	return ptr;
5546 }
5547 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5548 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5549 {
5550 	union trace_eval_map_item *ptr = v;
5551 
5552 	/*
5553 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5554 	 * This really should never happen.
5555 	 */
5556 	(*pos)++;
5557 	ptr = update_eval_map(ptr);
5558 	if (WARN_ON_ONCE(!ptr))
5559 		return NULL;
5560 
5561 	ptr++;
5562 	ptr = update_eval_map(ptr);
5563 
5564 	return ptr;
5565 }
5566 
eval_map_start(struct seq_file * m,loff_t * pos)5567 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5568 {
5569 	union trace_eval_map_item *v;
5570 	loff_t l = 0;
5571 
5572 	mutex_lock(&trace_eval_mutex);
5573 
5574 	v = trace_eval_maps;
5575 	if (v)
5576 		v++;
5577 
5578 	while (v && l < *pos) {
5579 		v = eval_map_next(m, v, &l);
5580 	}
5581 
5582 	return v;
5583 }
5584 
eval_map_stop(struct seq_file * m,void * v)5585 static void eval_map_stop(struct seq_file *m, void *v)
5586 {
5587 	mutex_unlock(&trace_eval_mutex);
5588 }
5589 
eval_map_show(struct seq_file * m,void * v)5590 static int eval_map_show(struct seq_file *m, void *v)
5591 {
5592 	union trace_eval_map_item *ptr = v;
5593 
5594 	seq_printf(m, "%s %ld (%s)\n",
5595 		   ptr->map.eval_string, ptr->map.eval_value,
5596 		   ptr->map.system);
5597 
5598 	return 0;
5599 }
5600 
5601 static const struct seq_operations tracing_eval_map_seq_ops = {
5602 	.start		= eval_map_start,
5603 	.next		= eval_map_next,
5604 	.stop		= eval_map_stop,
5605 	.show		= eval_map_show,
5606 };
5607 
tracing_eval_map_open(struct inode * inode,struct file * filp)5608 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5609 {
5610 	int ret;
5611 
5612 	ret = tracing_check_open_get_tr(NULL);
5613 	if (ret)
5614 		return ret;
5615 
5616 	return seq_open(filp, &tracing_eval_map_seq_ops);
5617 }
5618 
5619 static const struct file_operations tracing_eval_map_fops = {
5620 	.open		= tracing_eval_map_open,
5621 	.read		= seq_read,
5622 	.llseek		= seq_lseek,
5623 	.release	= seq_release,
5624 };
5625 
5626 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5627 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5628 {
5629 	/* Return tail of array given the head */
5630 	return ptr + ptr->head.length + 1;
5631 }
5632 
5633 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5634 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5635 			   int len)
5636 {
5637 	struct trace_eval_map **stop;
5638 	struct trace_eval_map **map;
5639 	union trace_eval_map_item *map_array;
5640 	union trace_eval_map_item *ptr;
5641 
5642 	stop = start + len;
5643 
5644 	/*
5645 	 * The trace_eval_maps contains the map plus a head and tail item,
5646 	 * where the head holds the module and length of array, and the
5647 	 * tail holds a pointer to the next list.
5648 	 */
5649 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5650 	if (!map_array) {
5651 		pr_warn("Unable to allocate trace eval mapping\n");
5652 		return;
5653 	}
5654 
5655 	mutex_lock(&trace_eval_mutex);
5656 
5657 	if (!trace_eval_maps)
5658 		trace_eval_maps = map_array;
5659 	else {
5660 		ptr = trace_eval_maps;
5661 		for (;;) {
5662 			ptr = trace_eval_jmp_to_tail(ptr);
5663 			if (!ptr->tail.next)
5664 				break;
5665 			ptr = ptr->tail.next;
5666 
5667 		}
5668 		ptr->tail.next = map_array;
5669 	}
5670 	map_array->head.mod = mod;
5671 	map_array->head.length = len;
5672 	map_array++;
5673 
5674 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5675 		map_array->map = **map;
5676 		map_array++;
5677 	}
5678 	memset(map_array, 0, sizeof(*map_array));
5679 
5680 	mutex_unlock(&trace_eval_mutex);
5681 }
5682 
trace_create_eval_file(struct dentry * d_tracer)5683 static void trace_create_eval_file(struct dentry *d_tracer)
5684 {
5685 	trace_create_file("eval_map", 0444, d_tracer,
5686 			  NULL, &tracing_eval_map_fops);
5687 }
5688 
5689 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5690 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5691 static inline void trace_insert_eval_map_file(struct module *mod,
5692 			      struct trace_eval_map **start, int len) { }
5693 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5694 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5695 static void trace_insert_eval_map(struct module *mod,
5696 				  struct trace_eval_map **start, int len)
5697 {
5698 	struct trace_eval_map **map;
5699 
5700 	if (len <= 0)
5701 		return;
5702 
5703 	map = start;
5704 
5705 	trace_event_eval_update(map, len);
5706 
5707 	trace_insert_eval_map_file(mod, start, len);
5708 }
5709 
5710 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5711 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5712 		       size_t cnt, loff_t *ppos)
5713 {
5714 	struct trace_array *tr = filp->private_data;
5715 	char buf[MAX_TRACER_SIZE+2];
5716 	int r;
5717 
5718 	mutex_lock(&trace_types_lock);
5719 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5720 	mutex_unlock(&trace_types_lock);
5721 
5722 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5723 }
5724 
tracer_init(struct tracer * t,struct trace_array * tr)5725 int tracer_init(struct tracer *t, struct trace_array *tr)
5726 {
5727 	tracing_reset_online_cpus(&tr->array_buffer);
5728 	return t->init(tr);
5729 }
5730 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5731 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5732 {
5733 	int cpu;
5734 
5735 	for_each_tracing_cpu(cpu)
5736 		per_cpu_ptr(buf->data, cpu)->entries = val;
5737 }
5738 
5739 #ifdef CONFIG_TRACER_MAX_TRACE
5740 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5741 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5742 					struct array_buffer *size_buf, int cpu_id)
5743 {
5744 	int cpu, ret = 0;
5745 
5746 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5747 		for_each_tracing_cpu(cpu) {
5748 			ret = ring_buffer_resize(trace_buf->buffer,
5749 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5750 			if (ret < 0)
5751 				break;
5752 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5753 				per_cpu_ptr(size_buf->data, cpu)->entries;
5754 		}
5755 	} else {
5756 		ret = ring_buffer_resize(trace_buf->buffer,
5757 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5758 		if (ret == 0)
5759 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5760 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5761 	}
5762 
5763 	return ret;
5764 }
5765 #endif /* CONFIG_TRACER_MAX_TRACE */
5766 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5767 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5768 					unsigned long size, int cpu)
5769 {
5770 	int ret;
5771 
5772 	/*
5773 	 * If kernel or user changes the size of the ring buffer
5774 	 * we use the size that was given, and we can forget about
5775 	 * expanding it later.
5776 	 */
5777 	ring_buffer_expanded = true;
5778 
5779 	/* May be called before buffers are initialized */
5780 	if (!tr->array_buffer.buffer)
5781 		return 0;
5782 
5783 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5784 	if (ret < 0)
5785 		return ret;
5786 
5787 #ifdef CONFIG_TRACER_MAX_TRACE
5788 	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5789 	    !tr->current_trace->use_max_tr)
5790 		goto out;
5791 
5792 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5793 	if (ret < 0) {
5794 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5795 						     &tr->array_buffer, cpu);
5796 		if (r < 0) {
5797 			/*
5798 			 * AARGH! We are left with different
5799 			 * size max buffer!!!!
5800 			 * The max buffer is our "snapshot" buffer.
5801 			 * When a tracer needs a snapshot (one of the
5802 			 * latency tracers), it swaps the max buffer
5803 			 * with the saved snap shot. We succeeded to
5804 			 * update the size of the main buffer, but failed to
5805 			 * update the size of the max buffer. But when we tried
5806 			 * to reset the main buffer to the original size, we
5807 			 * failed there too. This is very unlikely to
5808 			 * happen, but if it does, warn and kill all
5809 			 * tracing.
5810 			 */
5811 			WARN_ON(1);
5812 			tracing_disabled = 1;
5813 		}
5814 		return ret;
5815 	}
5816 
5817 	if (cpu == RING_BUFFER_ALL_CPUS)
5818 		set_buffer_entries(&tr->max_buffer, size);
5819 	else
5820 		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5821 
5822  out:
5823 #endif /* CONFIG_TRACER_MAX_TRACE */
5824 
5825 	if (cpu == RING_BUFFER_ALL_CPUS)
5826 		set_buffer_entries(&tr->array_buffer, size);
5827 	else
5828 		per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
5829 
5830 	return ret;
5831 }
5832 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5833 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5834 				  unsigned long size, int cpu_id)
5835 {
5836 	int ret = size;
5837 
5838 	mutex_lock(&trace_types_lock);
5839 
5840 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5841 		/* make sure, this cpu is enabled in the mask */
5842 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5843 			ret = -EINVAL;
5844 			goto out;
5845 		}
5846 	}
5847 
5848 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5849 	if (ret < 0)
5850 		ret = -ENOMEM;
5851 
5852 out:
5853 	mutex_unlock(&trace_types_lock);
5854 
5855 	return ret;
5856 }
5857 
5858 
5859 /**
5860  * tracing_update_buffers - used by tracing facility to expand ring buffers
5861  *
5862  * To save on memory when the tracing is never used on a system with it
5863  * configured in. The ring buffers are set to a minimum size. But once
5864  * a user starts to use the tracing facility, then they need to grow
5865  * to their default size.
5866  *
5867  * This function is to be called when a tracer is about to be used.
5868  */
tracing_update_buffers(void)5869 int tracing_update_buffers(void)
5870 {
5871 	int ret = 0;
5872 
5873 	mutex_lock(&trace_types_lock);
5874 	if (!ring_buffer_expanded)
5875 		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5876 						RING_BUFFER_ALL_CPUS);
5877 	mutex_unlock(&trace_types_lock);
5878 
5879 	return ret;
5880 }
5881 
5882 struct trace_option_dentry;
5883 
5884 static void
5885 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5886 
5887 /*
5888  * Used to clear out the tracer before deletion of an instance.
5889  * Must have trace_types_lock held.
5890  */
tracing_set_nop(struct trace_array * tr)5891 static void tracing_set_nop(struct trace_array *tr)
5892 {
5893 	if (tr->current_trace == &nop_trace)
5894 		return;
5895 
5896 	tr->current_trace->enabled--;
5897 
5898 	if (tr->current_trace->reset)
5899 		tr->current_trace->reset(tr);
5900 
5901 	tr->current_trace = &nop_trace;
5902 }
5903 
add_tracer_options(struct trace_array * tr,struct tracer * t)5904 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5905 {
5906 	/* Only enable if the directory has been created already. */
5907 	if (!tr->dir)
5908 		return;
5909 
5910 	create_trace_option_files(tr, t);
5911 }
5912 
tracing_set_tracer(struct trace_array * tr,const char * buf)5913 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5914 {
5915 	struct tracer *t;
5916 #ifdef CONFIG_TRACER_MAX_TRACE
5917 	bool had_max_tr;
5918 #endif
5919 	int ret = 0;
5920 
5921 	mutex_lock(&trace_types_lock);
5922 
5923 	if (!ring_buffer_expanded) {
5924 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5925 						RING_BUFFER_ALL_CPUS);
5926 		if (ret < 0)
5927 			goto out;
5928 		ret = 0;
5929 	}
5930 
5931 	for (t = trace_types; t; t = t->next) {
5932 		if (strcmp(t->name, buf) == 0)
5933 			break;
5934 	}
5935 	if (!t) {
5936 		ret = -EINVAL;
5937 		goto out;
5938 	}
5939 	if (t == tr->current_trace)
5940 		goto out;
5941 
5942 #ifdef CONFIG_TRACER_SNAPSHOT
5943 	if (t->use_max_tr) {
5944 		arch_spin_lock(&tr->max_lock);
5945 		if (tr->cond_snapshot)
5946 			ret = -EBUSY;
5947 		arch_spin_unlock(&tr->max_lock);
5948 		if (ret)
5949 			goto out;
5950 	}
5951 #endif
5952 	/* Some tracers won't work on kernel command line */
5953 	if (system_state < SYSTEM_RUNNING && t->noboot) {
5954 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5955 			t->name);
5956 		goto out;
5957 	}
5958 
5959 	/* Some tracers are only allowed for the top level buffer */
5960 	if (!trace_ok_for_array(t, tr)) {
5961 		ret = -EINVAL;
5962 		goto out;
5963 	}
5964 
5965 	/* If trace pipe files are being read, we can't change the tracer */
5966 	if (tr->trace_ref) {
5967 		ret = -EBUSY;
5968 		goto out;
5969 	}
5970 
5971 	trace_branch_disable();
5972 
5973 	tr->current_trace->enabled--;
5974 
5975 	if (tr->current_trace->reset)
5976 		tr->current_trace->reset(tr);
5977 
5978 	/* Current trace needs to be nop_trace before synchronize_rcu */
5979 	tr->current_trace = &nop_trace;
5980 
5981 #ifdef CONFIG_TRACER_MAX_TRACE
5982 	had_max_tr = tr->allocated_snapshot;
5983 
5984 	if (had_max_tr && !t->use_max_tr) {
5985 		/*
5986 		 * We need to make sure that the update_max_tr sees that
5987 		 * current_trace changed to nop_trace to keep it from
5988 		 * swapping the buffers after we resize it.
5989 		 * The update_max_tr is called from interrupts disabled
5990 		 * so a synchronized_sched() is sufficient.
5991 		 */
5992 		synchronize_rcu();
5993 		free_snapshot(tr);
5994 	}
5995 #endif
5996 
5997 #ifdef CONFIG_TRACER_MAX_TRACE
5998 	if (t->use_max_tr && !had_max_tr) {
5999 		ret = tracing_alloc_snapshot_instance(tr);
6000 		if (ret < 0)
6001 			goto out;
6002 	}
6003 #endif
6004 
6005 	if (t->init) {
6006 		ret = tracer_init(t, tr);
6007 		if (ret)
6008 			goto out;
6009 	}
6010 
6011 	tr->current_trace = t;
6012 	tr->current_trace->enabled++;
6013 	trace_branch_enable(tr);
6014  out:
6015 	mutex_unlock(&trace_types_lock);
6016 
6017 	return ret;
6018 }
6019 
6020 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6021 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6022 			size_t cnt, loff_t *ppos)
6023 {
6024 	struct trace_array *tr = filp->private_data;
6025 	char buf[MAX_TRACER_SIZE+1];
6026 	int i;
6027 	size_t ret;
6028 	int err;
6029 
6030 	ret = cnt;
6031 
6032 	if (cnt > MAX_TRACER_SIZE)
6033 		cnt = MAX_TRACER_SIZE;
6034 
6035 	if (copy_from_user(buf, ubuf, cnt))
6036 		return -EFAULT;
6037 
6038 	buf[cnt] = 0;
6039 
6040 	/* strip ending whitespace. */
6041 	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6042 		buf[i] = 0;
6043 
6044 	err = tracing_set_tracer(tr, buf);
6045 	if (err)
6046 		return err;
6047 
6048 	*ppos += ret;
6049 
6050 	return ret;
6051 }
6052 
6053 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6054 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6055 		   size_t cnt, loff_t *ppos)
6056 {
6057 	char buf[64];
6058 	int r;
6059 
6060 	r = snprintf(buf, sizeof(buf), "%ld\n",
6061 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6062 	if (r > sizeof(buf))
6063 		r = sizeof(buf);
6064 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6065 }
6066 
6067 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6068 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6069 		    size_t cnt, loff_t *ppos)
6070 {
6071 	unsigned long val;
6072 	int ret;
6073 
6074 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6075 	if (ret)
6076 		return ret;
6077 
6078 	*ptr = val * 1000;
6079 
6080 	return cnt;
6081 }
6082 
6083 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6084 tracing_thresh_read(struct file *filp, char __user *ubuf,
6085 		    size_t cnt, loff_t *ppos)
6086 {
6087 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6088 }
6089 
6090 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6091 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6092 		     size_t cnt, loff_t *ppos)
6093 {
6094 	struct trace_array *tr = filp->private_data;
6095 	int ret;
6096 
6097 	mutex_lock(&trace_types_lock);
6098 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6099 	if (ret < 0)
6100 		goto out;
6101 
6102 	if (tr->current_trace->update_thresh) {
6103 		ret = tr->current_trace->update_thresh(tr);
6104 		if (ret < 0)
6105 			goto out;
6106 	}
6107 
6108 	ret = cnt;
6109 out:
6110 	mutex_unlock(&trace_types_lock);
6111 
6112 	return ret;
6113 }
6114 
6115 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6116 
6117 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6118 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6119 		     size_t cnt, loff_t *ppos)
6120 {
6121 	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6122 }
6123 
6124 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6125 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6126 		      size_t cnt, loff_t *ppos)
6127 {
6128 	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6129 }
6130 
6131 #endif
6132 
tracing_open_pipe(struct inode * inode,struct file * filp)6133 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6134 {
6135 	struct trace_array *tr = inode->i_private;
6136 	struct trace_iterator *iter;
6137 	int ret;
6138 
6139 	ret = tracing_check_open_get_tr(tr);
6140 	if (ret)
6141 		return ret;
6142 
6143 	mutex_lock(&trace_types_lock);
6144 
6145 	/* create a buffer to store the information to pass to userspace */
6146 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6147 	if (!iter) {
6148 		ret = -ENOMEM;
6149 		__trace_array_put(tr);
6150 		goto out;
6151 	}
6152 
6153 	trace_seq_init(&iter->seq);
6154 	iter->trace = tr->current_trace;
6155 
6156 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6157 		ret = -ENOMEM;
6158 		goto fail;
6159 	}
6160 
6161 	/* trace pipe does not show start of buffer */
6162 	cpumask_setall(iter->started);
6163 
6164 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6165 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6166 
6167 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6168 	if (trace_clocks[tr->clock_id].in_ns)
6169 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6170 
6171 	iter->tr = tr;
6172 	iter->array_buffer = &tr->array_buffer;
6173 	iter->cpu_file = tracing_get_cpu(inode);
6174 	mutex_init(&iter->mutex);
6175 	filp->private_data = iter;
6176 
6177 	if (iter->trace->pipe_open)
6178 		iter->trace->pipe_open(iter);
6179 
6180 	nonseekable_open(inode, filp);
6181 
6182 	tr->trace_ref++;
6183 out:
6184 	mutex_unlock(&trace_types_lock);
6185 	return ret;
6186 
6187 fail:
6188 	kfree(iter);
6189 	__trace_array_put(tr);
6190 	mutex_unlock(&trace_types_lock);
6191 	return ret;
6192 }
6193 
tracing_release_pipe(struct inode * inode,struct file * file)6194 static int tracing_release_pipe(struct inode *inode, struct file *file)
6195 {
6196 	struct trace_iterator *iter = file->private_data;
6197 	struct trace_array *tr = inode->i_private;
6198 
6199 	mutex_lock(&trace_types_lock);
6200 
6201 	tr->trace_ref--;
6202 
6203 	if (iter->trace->pipe_close)
6204 		iter->trace->pipe_close(iter);
6205 
6206 	mutex_unlock(&trace_types_lock);
6207 
6208 	free_cpumask_var(iter->started);
6209 	mutex_destroy(&iter->mutex);
6210 	kfree(iter);
6211 
6212 	trace_array_put(tr);
6213 
6214 	return 0;
6215 }
6216 
6217 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6218 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6219 {
6220 	struct trace_array *tr = iter->tr;
6221 
6222 	/* Iterators are static, they should be filled or empty */
6223 	if (trace_buffer_iter(iter, iter->cpu_file))
6224 		return EPOLLIN | EPOLLRDNORM;
6225 
6226 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6227 		/*
6228 		 * Always select as readable when in blocking mode
6229 		 */
6230 		return EPOLLIN | EPOLLRDNORM;
6231 	else
6232 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6233 					     filp, poll_table);
6234 }
6235 
6236 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6237 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6238 {
6239 	struct trace_iterator *iter = filp->private_data;
6240 
6241 	return trace_poll(iter, filp, poll_table);
6242 }
6243 
6244 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6245 static int tracing_wait_pipe(struct file *filp)
6246 {
6247 	struct trace_iterator *iter = filp->private_data;
6248 	int ret;
6249 
6250 	while (trace_empty(iter)) {
6251 
6252 		if ((filp->f_flags & O_NONBLOCK)) {
6253 			return -EAGAIN;
6254 		}
6255 
6256 		/*
6257 		 * We block until we read something and tracing is disabled.
6258 		 * We still block if tracing is disabled, but we have never
6259 		 * read anything. This allows a user to cat this file, and
6260 		 * then enable tracing. But after we have read something,
6261 		 * we give an EOF when tracing is again disabled.
6262 		 *
6263 		 * iter->pos will be 0 if we haven't read anything.
6264 		 */
6265 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6266 			break;
6267 
6268 		mutex_unlock(&iter->mutex);
6269 
6270 		ret = wait_on_pipe(iter, 0);
6271 
6272 		mutex_lock(&iter->mutex);
6273 
6274 		if (ret)
6275 			return ret;
6276 	}
6277 
6278 	return 1;
6279 }
6280 
6281 /*
6282  * Consumer reader.
6283  */
6284 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6285 tracing_read_pipe(struct file *filp, char __user *ubuf,
6286 		  size_t cnt, loff_t *ppos)
6287 {
6288 	struct trace_iterator *iter = filp->private_data;
6289 	ssize_t sret;
6290 
6291 	/*
6292 	 * Avoid more than one consumer on a single file descriptor
6293 	 * This is just a matter of traces coherency, the ring buffer itself
6294 	 * is protected.
6295 	 */
6296 	mutex_lock(&iter->mutex);
6297 
6298 	/* return any leftover data */
6299 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6300 	if (sret != -EBUSY)
6301 		goto out;
6302 
6303 	trace_seq_init(&iter->seq);
6304 
6305 	if (iter->trace->read) {
6306 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6307 		if (sret)
6308 			goto out;
6309 	}
6310 
6311 waitagain:
6312 	sret = tracing_wait_pipe(filp);
6313 	if (sret <= 0)
6314 		goto out;
6315 
6316 	/* stop when tracing is finished */
6317 	if (trace_empty(iter)) {
6318 		sret = 0;
6319 		goto out;
6320 	}
6321 
6322 	if (cnt >= PAGE_SIZE)
6323 		cnt = PAGE_SIZE - 1;
6324 
6325 	/* reset all but tr, trace, and overruns */
6326 	memset(&iter->seq, 0,
6327 	       sizeof(struct trace_iterator) -
6328 	       offsetof(struct trace_iterator, seq));
6329 	cpumask_clear(iter->started);
6330 	trace_seq_init(&iter->seq);
6331 	iter->pos = -1;
6332 
6333 	trace_event_read_lock();
6334 	trace_access_lock(iter->cpu_file);
6335 	while (trace_find_next_entry_inc(iter) != NULL) {
6336 		enum print_line_t ret;
6337 		int save_len = iter->seq.seq.len;
6338 
6339 		ret = print_trace_line(iter);
6340 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6341 			/* don't print partial lines */
6342 			iter->seq.seq.len = save_len;
6343 			break;
6344 		}
6345 		if (ret != TRACE_TYPE_NO_CONSUME)
6346 			trace_consume(iter);
6347 
6348 		if (trace_seq_used(&iter->seq) >= cnt)
6349 			break;
6350 
6351 		/*
6352 		 * Setting the full flag means we reached the trace_seq buffer
6353 		 * size and we should leave by partial output condition above.
6354 		 * One of the trace_seq_* functions is not used properly.
6355 		 */
6356 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6357 			  iter->ent->type);
6358 	}
6359 	trace_access_unlock(iter->cpu_file);
6360 	trace_event_read_unlock();
6361 
6362 	/* Now copy what we have to the user */
6363 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6364 	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6365 		trace_seq_init(&iter->seq);
6366 
6367 	/*
6368 	 * If there was nothing to send to user, in spite of consuming trace
6369 	 * entries, go back to wait for more entries.
6370 	 */
6371 	if (sret == -EBUSY)
6372 		goto waitagain;
6373 
6374 out:
6375 	mutex_unlock(&iter->mutex);
6376 
6377 	return sret;
6378 }
6379 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6380 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6381 				     unsigned int idx)
6382 {
6383 	__free_page(spd->pages[idx]);
6384 }
6385 
6386 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6387 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6388 {
6389 	size_t count;
6390 	int save_len;
6391 	int ret;
6392 
6393 	/* Seq buffer is page-sized, exactly what we need. */
6394 	for (;;) {
6395 		save_len = iter->seq.seq.len;
6396 		ret = print_trace_line(iter);
6397 
6398 		if (trace_seq_has_overflowed(&iter->seq)) {
6399 			iter->seq.seq.len = save_len;
6400 			break;
6401 		}
6402 
6403 		/*
6404 		 * This should not be hit, because it should only
6405 		 * be set if the iter->seq overflowed. But check it
6406 		 * anyway to be safe.
6407 		 */
6408 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6409 			iter->seq.seq.len = save_len;
6410 			break;
6411 		}
6412 
6413 		count = trace_seq_used(&iter->seq) - save_len;
6414 		if (rem < count) {
6415 			rem = 0;
6416 			iter->seq.seq.len = save_len;
6417 			break;
6418 		}
6419 
6420 		if (ret != TRACE_TYPE_NO_CONSUME)
6421 			trace_consume(iter);
6422 		rem -= count;
6423 		if (!trace_find_next_entry_inc(iter))	{
6424 			rem = 0;
6425 			iter->ent = NULL;
6426 			break;
6427 		}
6428 	}
6429 
6430 	return rem;
6431 }
6432 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6433 static ssize_t tracing_splice_read_pipe(struct file *filp,
6434 					loff_t *ppos,
6435 					struct pipe_inode_info *pipe,
6436 					size_t len,
6437 					unsigned int flags)
6438 {
6439 	struct page *pages_def[PIPE_DEF_BUFFERS];
6440 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6441 	struct trace_iterator *iter = filp->private_data;
6442 	struct splice_pipe_desc spd = {
6443 		.pages		= pages_def,
6444 		.partial	= partial_def,
6445 		.nr_pages	= 0, /* This gets updated below. */
6446 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6447 		.ops		= &default_pipe_buf_ops,
6448 		.spd_release	= tracing_spd_release_pipe,
6449 	};
6450 	ssize_t ret;
6451 	size_t rem;
6452 	unsigned int i;
6453 
6454 	if (splice_grow_spd(pipe, &spd))
6455 		return -ENOMEM;
6456 
6457 	mutex_lock(&iter->mutex);
6458 
6459 	if (iter->trace->splice_read) {
6460 		ret = iter->trace->splice_read(iter, filp,
6461 					       ppos, pipe, len, flags);
6462 		if (ret)
6463 			goto out_err;
6464 	}
6465 
6466 	ret = tracing_wait_pipe(filp);
6467 	if (ret <= 0)
6468 		goto out_err;
6469 
6470 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6471 		ret = -EFAULT;
6472 		goto out_err;
6473 	}
6474 
6475 	trace_event_read_lock();
6476 	trace_access_lock(iter->cpu_file);
6477 
6478 	/* Fill as many pages as possible. */
6479 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6480 		spd.pages[i] = alloc_page(GFP_KERNEL);
6481 		if (!spd.pages[i])
6482 			break;
6483 
6484 		rem = tracing_fill_pipe_page(rem, iter);
6485 
6486 		/* Copy the data into the page, so we can start over. */
6487 		ret = trace_seq_to_buffer(&iter->seq,
6488 					  page_address(spd.pages[i]),
6489 					  trace_seq_used(&iter->seq));
6490 		if (ret < 0) {
6491 			__free_page(spd.pages[i]);
6492 			break;
6493 		}
6494 		spd.partial[i].offset = 0;
6495 		spd.partial[i].len = trace_seq_used(&iter->seq);
6496 
6497 		trace_seq_init(&iter->seq);
6498 	}
6499 
6500 	trace_access_unlock(iter->cpu_file);
6501 	trace_event_read_unlock();
6502 	mutex_unlock(&iter->mutex);
6503 
6504 	spd.nr_pages = i;
6505 
6506 	if (i)
6507 		ret = splice_to_pipe(pipe, &spd);
6508 	else
6509 		ret = 0;
6510 out:
6511 	splice_shrink_spd(&spd);
6512 	return ret;
6513 
6514 out_err:
6515 	mutex_unlock(&iter->mutex);
6516 	goto out;
6517 }
6518 
6519 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6520 tracing_entries_read(struct file *filp, char __user *ubuf,
6521 		     size_t cnt, loff_t *ppos)
6522 {
6523 	struct inode *inode = file_inode(filp);
6524 	struct trace_array *tr = inode->i_private;
6525 	int cpu = tracing_get_cpu(inode);
6526 	char buf[64];
6527 	int r = 0;
6528 	ssize_t ret;
6529 
6530 	mutex_lock(&trace_types_lock);
6531 
6532 	if (cpu == RING_BUFFER_ALL_CPUS) {
6533 		int cpu, buf_size_same;
6534 		unsigned long size;
6535 
6536 		size = 0;
6537 		buf_size_same = 1;
6538 		/* check if all cpu sizes are same */
6539 		for_each_tracing_cpu(cpu) {
6540 			/* fill in the size from first enabled cpu */
6541 			if (size == 0)
6542 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6543 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6544 				buf_size_same = 0;
6545 				break;
6546 			}
6547 		}
6548 
6549 		if (buf_size_same) {
6550 			if (!ring_buffer_expanded)
6551 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6552 					    size >> 10,
6553 					    trace_buf_size >> 10);
6554 			else
6555 				r = sprintf(buf, "%lu\n", size >> 10);
6556 		} else
6557 			r = sprintf(buf, "X\n");
6558 	} else
6559 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6560 
6561 	mutex_unlock(&trace_types_lock);
6562 
6563 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6564 	return ret;
6565 }
6566 
6567 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6568 tracing_entries_write(struct file *filp, const char __user *ubuf,
6569 		      size_t cnt, loff_t *ppos)
6570 {
6571 	struct inode *inode = file_inode(filp);
6572 	struct trace_array *tr = inode->i_private;
6573 	unsigned long val;
6574 	int ret;
6575 
6576 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6577 	if (ret)
6578 		return ret;
6579 
6580 	/* must have at least 1 entry */
6581 	if (!val)
6582 		return -EINVAL;
6583 
6584 	/* value is in KB */
6585 	val <<= 10;
6586 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6587 	if (ret < 0)
6588 		return ret;
6589 
6590 	*ppos += cnt;
6591 
6592 	return cnt;
6593 }
6594 
6595 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6596 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6597 				size_t cnt, loff_t *ppos)
6598 {
6599 	struct trace_array *tr = filp->private_data;
6600 	char buf[64];
6601 	int r, cpu;
6602 	unsigned long size = 0, expanded_size = 0;
6603 
6604 	mutex_lock(&trace_types_lock);
6605 	for_each_tracing_cpu(cpu) {
6606 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6607 		if (!ring_buffer_expanded)
6608 			expanded_size += trace_buf_size >> 10;
6609 	}
6610 	if (ring_buffer_expanded)
6611 		r = sprintf(buf, "%lu\n", size);
6612 	else
6613 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6614 	mutex_unlock(&trace_types_lock);
6615 
6616 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6617 }
6618 
6619 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6620 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6621 			  size_t cnt, loff_t *ppos)
6622 {
6623 	/*
6624 	 * There is no need to read what the user has written, this function
6625 	 * is just to make sure that there is no error when "echo" is used
6626 	 */
6627 
6628 	*ppos += cnt;
6629 
6630 	return cnt;
6631 }
6632 
6633 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6634 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6635 {
6636 	struct trace_array *tr = inode->i_private;
6637 
6638 	/* disable tracing ? */
6639 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6640 		tracer_tracing_off(tr);
6641 	/* resize the ring buffer to 0 */
6642 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6643 
6644 	trace_array_put(tr);
6645 
6646 	return 0;
6647 }
6648 
6649 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6650 tracing_mark_write(struct file *filp, const char __user *ubuf,
6651 					size_t cnt, loff_t *fpos)
6652 {
6653 	struct trace_array *tr = filp->private_data;
6654 	struct ring_buffer_event *event;
6655 	enum event_trigger_type tt = ETT_NONE;
6656 	struct trace_buffer *buffer;
6657 	struct print_entry *entry;
6658 	unsigned long irq_flags;
6659 	ssize_t written;
6660 	int size;
6661 	int len;
6662 
6663 /* Used in tracing_mark_raw_write() as well */
6664 #define FAULTED_STR "<faulted>"
6665 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6666 
6667 	if (tracing_disabled)
6668 		return -EINVAL;
6669 
6670 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6671 		return -EINVAL;
6672 
6673 	if (cnt > TRACE_BUF_SIZE)
6674 		cnt = TRACE_BUF_SIZE;
6675 
6676 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6677 
6678 	local_save_flags(irq_flags);
6679 	size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6680 
6681 	/* If less than "<faulted>", then make sure we can still add that */
6682 	if (cnt < FAULTED_SIZE)
6683 		size += FAULTED_SIZE - cnt;
6684 
6685 	buffer = tr->array_buffer.buffer;
6686 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6687 					    irq_flags, preempt_count());
6688 	if (unlikely(!event))
6689 		/* Ring buffer disabled, return as if not open for write */
6690 		return -EBADF;
6691 
6692 	entry = ring_buffer_event_data(event);
6693 	entry->ip = _THIS_IP_;
6694 
6695 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6696 	if (len) {
6697 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6698 		cnt = FAULTED_SIZE;
6699 		written = -EFAULT;
6700 	} else
6701 		written = cnt;
6702 
6703 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6704 		/* do not add \n before testing triggers, but add \0 */
6705 		entry->buf[cnt] = '\0';
6706 		tt = event_triggers_call(tr->trace_marker_file, entry, event);
6707 	}
6708 
6709 	if (entry->buf[cnt - 1] != '\n') {
6710 		entry->buf[cnt] = '\n';
6711 		entry->buf[cnt + 1] = '\0';
6712 	} else
6713 		entry->buf[cnt] = '\0';
6714 
6715 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6716 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6717 	__buffer_unlock_commit(buffer, event);
6718 
6719 	if (tt)
6720 		event_triggers_post_call(tr->trace_marker_file, tt);
6721 
6722 	if (written > 0)
6723 		*fpos += written;
6724 
6725 	return written;
6726 }
6727 
6728 /* Limit it for now to 3K (including tag) */
6729 #define RAW_DATA_MAX_SIZE (1024*3)
6730 
6731 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6732 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6733 					size_t cnt, loff_t *fpos)
6734 {
6735 	struct trace_array *tr = filp->private_data;
6736 	struct ring_buffer_event *event;
6737 	struct trace_buffer *buffer;
6738 	struct raw_data_entry *entry;
6739 	unsigned long irq_flags;
6740 	ssize_t written;
6741 	int size;
6742 	int len;
6743 
6744 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6745 
6746 	if (tracing_disabled)
6747 		return -EINVAL;
6748 
6749 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6750 		return -EINVAL;
6751 
6752 	/* The marker must at least have a tag id */
6753 	if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6754 		return -EINVAL;
6755 
6756 	if (cnt > TRACE_BUF_SIZE)
6757 		cnt = TRACE_BUF_SIZE;
6758 
6759 	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6760 
6761 	local_save_flags(irq_flags);
6762 	size = sizeof(*entry) + cnt;
6763 	if (cnt < FAULT_SIZE_ID)
6764 		size += FAULT_SIZE_ID - cnt;
6765 
6766 	buffer = tr->array_buffer.buffer;
6767 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6768 					    irq_flags, preempt_count());
6769 	if (!event)
6770 		/* Ring buffer disabled, return as if not open for write */
6771 		return -EBADF;
6772 
6773 	entry = ring_buffer_event_data(event);
6774 
6775 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6776 	if (len) {
6777 		entry->id = -1;
6778 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6779 		written = -EFAULT;
6780 	} else
6781 		written = cnt;
6782 
6783 	__buffer_unlock_commit(buffer, event);
6784 
6785 	if (written > 0)
6786 		*fpos += written;
6787 
6788 	return written;
6789 }
6790 
tracing_clock_show(struct seq_file * m,void * v)6791 static int tracing_clock_show(struct seq_file *m, void *v)
6792 {
6793 	struct trace_array *tr = m->private;
6794 	int i;
6795 
6796 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6797 		seq_printf(m,
6798 			"%s%s%s%s", i ? " " : "",
6799 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6800 			i == tr->clock_id ? "]" : "");
6801 	seq_putc(m, '\n');
6802 
6803 	return 0;
6804 }
6805 
tracing_set_clock(struct trace_array * tr,const char * clockstr)6806 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6807 {
6808 	int i;
6809 
6810 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6811 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
6812 			break;
6813 	}
6814 	if (i == ARRAY_SIZE(trace_clocks))
6815 		return -EINVAL;
6816 
6817 	mutex_lock(&trace_types_lock);
6818 
6819 	tr->clock_id = i;
6820 
6821 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6822 
6823 	/*
6824 	 * New clock may not be consistent with the previous clock.
6825 	 * Reset the buffer so that it doesn't have incomparable timestamps.
6826 	 */
6827 	tracing_reset_online_cpus(&tr->array_buffer);
6828 
6829 #ifdef CONFIG_TRACER_MAX_TRACE
6830 	if (tr->max_buffer.buffer)
6831 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6832 	tracing_reset_online_cpus(&tr->max_buffer);
6833 #endif
6834 
6835 	mutex_unlock(&trace_types_lock);
6836 
6837 	return 0;
6838 }
6839 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6840 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6841 				   size_t cnt, loff_t *fpos)
6842 {
6843 	struct seq_file *m = filp->private_data;
6844 	struct trace_array *tr = m->private;
6845 	char buf[64];
6846 	const char *clockstr;
6847 	int ret;
6848 
6849 	if (cnt >= sizeof(buf))
6850 		return -EINVAL;
6851 
6852 	if (copy_from_user(buf, ubuf, cnt))
6853 		return -EFAULT;
6854 
6855 	buf[cnt] = 0;
6856 
6857 	clockstr = strstrip(buf);
6858 
6859 	ret = tracing_set_clock(tr, clockstr);
6860 	if (ret)
6861 		return ret;
6862 
6863 	*fpos += cnt;
6864 
6865 	return cnt;
6866 }
6867 
tracing_clock_open(struct inode * inode,struct file * file)6868 static int tracing_clock_open(struct inode *inode, struct file *file)
6869 {
6870 	struct trace_array *tr = inode->i_private;
6871 	int ret;
6872 
6873 	ret = tracing_check_open_get_tr(tr);
6874 	if (ret)
6875 		return ret;
6876 
6877 	ret = single_open(file, tracing_clock_show, inode->i_private);
6878 	if (ret < 0)
6879 		trace_array_put(tr);
6880 
6881 	return ret;
6882 }
6883 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6884 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6885 {
6886 	struct trace_array *tr = m->private;
6887 
6888 	mutex_lock(&trace_types_lock);
6889 
6890 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6891 		seq_puts(m, "delta [absolute]\n");
6892 	else
6893 		seq_puts(m, "[delta] absolute\n");
6894 
6895 	mutex_unlock(&trace_types_lock);
6896 
6897 	return 0;
6898 }
6899 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6900 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6901 {
6902 	struct trace_array *tr = inode->i_private;
6903 	int ret;
6904 
6905 	ret = tracing_check_open_get_tr(tr);
6906 	if (ret)
6907 		return ret;
6908 
6909 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6910 	if (ret < 0)
6911 		trace_array_put(tr);
6912 
6913 	return ret;
6914 }
6915 
tracing_set_time_stamp_abs(struct trace_array * tr,bool abs)6916 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6917 {
6918 	int ret = 0;
6919 
6920 	mutex_lock(&trace_types_lock);
6921 
6922 	if (abs && tr->time_stamp_abs_ref++)
6923 		goto out;
6924 
6925 	if (!abs) {
6926 		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6927 			ret = -EINVAL;
6928 			goto out;
6929 		}
6930 
6931 		if (--tr->time_stamp_abs_ref)
6932 			goto out;
6933 	}
6934 
6935 	ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
6936 
6937 #ifdef CONFIG_TRACER_MAX_TRACE
6938 	if (tr->max_buffer.buffer)
6939 		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6940 #endif
6941  out:
6942 	mutex_unlock(&trace_types_lock);
6943 
6944 	return ret;
6945 }
6946 
6947 struct ftrace_buffer_info {
6948 	struct trace_iterator	iter;
6949 	void			*spare;
6950 	unsigned int		spare_cpu;
6951 	unsigned int		read;
6952 };
6953 
6954 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)6955 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6956 {
6957 	struct trace_array *tr = inode->i_private;
6958 	struct trace_iterator *iter;
6959 	struct seq_file *m;
6960 	int ret;
6961 
6962 	ret = tracing_check_open_get_tr(tr);
6963 	if (ret)
6964 		return ret;
6965 
6966 	if (file->f_mode & FMODE_READ) {
6967 		iter = __tracing_open(inode, file, true);
6968 		if (IS_ERR(iter))
6969 			ret = PTR_ERR(iter);
6970 	} else {
6971 		/* Writes still need the seq_file to hold the private data */
6972 		ret = -ENOMEM;
6973 		m = kzalloc(sizeof(*m), GFP_KERNEL);
6974 		if (!m)
6975 			goto out;
6976 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6977 		if (!iter) {
6978 			kfree(m);
6979 			goto out;
6980 		}
6981 		ret = 0;
6982 
6983 		iter->tr = tr;
6984 		iter->array_buffer = &tr->max_buffer;
6985 		iter->cpu_file = tracing_get_cpu(inode);
6986 		m->private = iter;
6987 		file->private_data = m;
6988 	}
6989 out:
6990 	if (ret < 0)
6991 		trace_array_put(tr);
6992 
6993 	return ret;
6994 }
6995 
6996 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6997 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6998 		       loff_t *ppos)
6999 {
7000 	struct seq_file *m = filp->private_data;
7001 	struct trace_iterator *iter = m->private;
7002 	struct trace_array *tr = iter->tr;
7003 	unsigned long val;
7004 	int ret;
7005 
7006 	ret = tracing_update_buffers();
7007 	if (ret < 0)
7008 		return ret;
7009 
7010 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7011 	if (ret)
7012 		return ret;
7013 
7014 	mutex_lock(&trace_types_lock);
7015 
7016 	if (tr->current_trace->use_max_tr) {
7017 		ret = -EBUSY;
7018 		goto out;
7019 	}
7020 
7021 	arch_spin_lock(&tr->max_lock);
7022 	if (tr->cond_snapshot)
7023 		ret = -EBUSY;
7024 	arch_spin_unlock(&tr->max_lock);
7025 	if (ret)
7026 		goto out;
7027 
7028 	switch (val) {
7029 	case 0:
7030 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7031 			ret = -EINVAL;
7032 			break;
7033 		}
7034 		if (tr->allocated_snapshot)
7035 			free_snapshot(tr);
7036 		break;
7037 	case 1:
7038 /* Only allow per-cpu swap if the ring buffer supports it */
7039 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7040 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7041 			ret = -EINVAL;
7042 			break;
7043 		}
7044 #endif
7045 		if (tr->allocated_snapshot)
7046 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7047 					&tr->array_buffer, iter->cpu_file);
7048 		else
7049 			ret = tracing_alloc_snapshot_instance(tr);
7050 		if (ret < 0)
7051 			break;
7052 		local_irq_disable();
7053 		/* Now, we're going to swap */
7054 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7055 			update_max_tr(tr, current, smp_processor_id(), NULL);
7056 		else
7057 			update_max_tr_single(tr, current, iter->cpu_file);
7058 		local_irq_enable();
7059 		break;
7060 	default:
7061 		if (tr->allocated_snapshot) {
7062 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7063 				tracing_reset_online_cpus(&tr->max_buffer);
7064 			else
7065 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7066 		}
7067 		break;
7068 	}
7069 
7070 	if (ret >= 0) {
7071 		*ppos += cnt;
7072 		ret = cnt;
7073 	}
7074 out:
7075 	mutex_unlock(&trace_types_lock);
7076 	return ret;
7077 }
7078 
tracing_snapshot_release(struct inode * inode,struct file * file)7079 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7080 {
7081 	struct seq_file *m = file->private_data;
7082 	int ret;
7083 
7084 	ret = tracing_release(inode, file);
7085 
7086 	if (file->f_mode & FMODE_READ)
7087 		return ret;
7088 
7089 	/* If write only, the seq_file is just a stub */
7090 	if (m)
7091 		kfree(m->private);
7092 	kfree(m);
7093 
7094 	return 0;
7095 }
7096 
7097 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7098 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7099 				    size_t count, loff_t *ppos);
7100 static int tracing_buffers_release(struct inode *inode, struct file *file);
7101 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7102 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7103 
snapshot_raw_open(struct inode * inode,struct file * filp)7104 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7105 {
7106 	struct ftrace_buffer_info *info;
7107 	int ret;
7108 
7109 	/* The following checks for tracefs lockdown */
7110 	ret = tracing_buffers_open(inode, filp);
7111 	if (ret < 0)
7112 		return ret;
7113 
7114 	info = filp->private_data;
7115 
7116 	if (info->iter.trace->use_max_tr) {
7117 		tracing_buffers_release(inode, filp);
7118 		return -EBUSY;
7119 	}
7120 
7121 	info->iter.snapshot = true;
7122 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7123 
7124 	return ret;
7125 }
7126 
7127 #endif /* CONFIG_TRACER_SNAPSHOT */
7128 
7129 
7130 static const struct file_operations tracing_thresh_fops = {
7131 	.open		= tracing_open_generic,
7132 	.read		= tracing_thresh_read,
7133 	.write		= tracing_thresh_write,
7134 	.llseek		= generic_file_llseek,
7135 };
7136 
7137 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7138 static const struct file_operations tracing_max_lat_fops = {
7139 	.open		= tracing_open_generic,
7140 	.read		= tracing_max_lat_read,
7141 	.write		= tracing_max_lat_write,
7142 	.llseek		= generic_file_llseek,
7143 };
7144 #endif
7145 
7146 static const struct file_operations set_tracer_fops = {
7147 	.open		= tracing_open_generic,
7148 	.read		= tracing_set_trace_read,
7149 	.write		= tracing_set_trace_write,
7150 	.llseek		= generic_file_llseek,
7151 };
7152 
7153 static const struct file_operations tracing_pipe_fops = {
7154 	.open		= tracing_open_pipe,
7155 	.poll		= tracing_poll_pipe,
7156 	.read		= tracing_read_pipe,
7157 	.splice_read	= tracing_splice_read_pipe,
7158 	.release	= tracing_release_pipe,
7159 	.llseek		= no_llseek,
7160 };
7161 
7162 static const struct file_operations tracing_entries_fops = {
7163 	.open		= tracing_open_generic_tr,
7164 	.read		= tracing_entries_read,
7165 	.write		= tracing_entries_write,
7166 	.llseek		= generic_file_llseek,
7167 	.release	= tracing_release_generic_tr,
7168 };
7169 
7170 static const struct file_operations tracing_total_entries_fops = {
7171 	.open		= tracing_open_generic_tr,
7172 	.read		= tracing_total_entries_read,
7173 	.llseek		= generic_file_llseek,
7174 	.release	= tracing_release_generic_tr,
7175 };
7176 
7177 static const struct file_operations tracing_free_buffer_fops = {
7178 	.open		= tracing_open_generic_tr,
7179 	.write		= tracing_free_buffer_write,
7180 	.release	= tracing_free_buffer_release,
7181 };
7182 
7183 static const struct file_operations tracing_mark_fops = {
7184 	.open		= tracing_open_generic_tr,
7185 	.write		= tracing_mark_write,
7186 	.llseek		= generic_file_llseek,
7187 	.release	= tracing_release_generic_tr,
7188 };
7189 
7190 static const struct file_operations tracing_mark_raw_fops = {
7191 	.open		= tracing_open_generic_tr,
7192 	.write		= tracing_mark_raw_write,
7193 	.llseek		= generic_file_llseek,
7194 	.release	= tracing_release_generic_tr,
7195 };
7196 
7197 static const struct file_operations trace_clock_fops = {
7198 	.open		= tracing_clock_open,
7199 	.read		= seq_read,
7200 	.llseek		= seq_lseek,
7201 	.release	= tracing_single_release_tr,
7202 	.write		= tracing_clock_write,
7203 };
7204 
7205 static const struct file_operations trace_time_stamp_mode_fops = {
7206 	.open		= tracing_time_stamp_mode_open,
7207 	.read		= seq_read,
7208 	.llseek		= seq_lseek,
7209 	.release	= tracing_single_release_tr,
7210 };
7211 
7212 #ifdef CONFIG_TRACER_SNAPSHOT
7213 static const struct file_operations snapshot_fops = {
7214 	.open		= tracing_snapshot_open,
7215 	.read		= seq_read,
7216 	.write		= tracing_snapshot_write,
7217 	.llseek		= tracing_lseek,
7218 	.release	= tracing_snapshot_release,
7219 };
7220 
7221 static const struct file_operations snapshot_raw_fops = {
7222 	.open		= snapshot_raw_open,
7223 	.read		= tracing_buffers_read,
7224 	.release	= tracing_buffers_release,
7225 	.splice_read	= tracing_buffers_splice_read,
7226 	.llseek		= no_llseek,
7227 };
7228 
7229 #endif /* CONFIG_TRACER_SNAPSHOT */
7230 
7231 #define TRACING_LOG_ERRS_MAX	8
7232 #define TRACING_LOG_LOC_MAX	128
7233 
7234 #define CMD_PREFIX "  Command: "
7235 
7236 struct err_info {
7237 	const char	**errs;	/* ptr to loc-specific array of err strings */
7238 	u8		type;	/* index into errs -> specific err string */
7239 	u8		pos;	/* MAX_FILTER_STR_VAL = 256 */
7240 	u64		ts;
7241 };
7242 
7243 struct tracing_log_err {
7244 	struct list_head	list;
7245 	struct err_info		info;
7246 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7247 	char			cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7248 };
7249 
7250 static DEFINE_MUTEX(tracing_err_log_lock);
7251 
get_tracing_log_err(struct trace_array * tr)7252 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7253 {
7254 	struct tracing_log_err *err;
7255 
7256 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7257 		err = kzalloc(sizeof(*err), GFP_KERNEL);
7258 		if (!err)
7259 			err = ERR_PTR(-ENOMEM);
7260 		else
7261 			tr->n_err_log_entries++;
7262 
7263 		return err;
7264 	}
7265 
7266 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7267 	list_del(&err->list);
7268 
7269 	return err;
7270 }
7271 
7272 /**
7273  * err_pos - find the position of a string within a command for error careting
7274  * @cmd: The tracing command that caused the error
7275  * @str: The string to position the caret at within @cmd
7276  *
7277  * Finds the position of the first occurence of @str within @cmd.  The
7278  * return value can be passed to tracing_log_err() for caret placement
7279  * within @cmd.
7280  *
7281  * Returns the index within @cmd of the first occurence of @str or 0
7282  * if @str was not found.
7283  */
err_pos(char * cmd,const char * str)7284 unsigned int err_pos(char *cmd, const char *str)
7285 {
7286 	char *found;
7287 
7288 	if (WARN_ON(!strlen(cmd)))
7289 		return 0;
7290 
7291 	found = strstr(cmd, str);
7292 	if (found)
7293 		return found - cmd;
7294 
7295 	return 0;
7296 }
7297 
7298 /**
7299  * tracing_log_err - write an error to the tracing error log
7300  * @tr: The associated trace array for the error (NULL for top level array)
7301  * @loc: A string describing where the error occurred
7302  * @cmd: The tracing command that caused the error
7303  * @errs: The array of loc-specific static error strings
7304  * @type: The index into errs[], which produces the specific static err string
7305  * @pos: The position the caret should be placed in the cmd
7306  *
7307  * Writes an error into tracing/error_log of the form:
7308  *
7309  * <loc>: error: <text>
7310  *   Command: <cmd>
7311  *              ^
7312  *
7313  * tracing/error_log is a small log file containing the last
7314  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7315  * unless there has been a tracing error, and the error log can be
7316  * cleared and have its memory freed by writing the empty string in
7317  * truncation mode to it i.e. echo > tracing/error_log.
7318  *
7319  * NOTE: the @errs array along with the @type param are used to
7320  * produce a static error string - this string is not copied and saved
7321  * when the error is logged - only a pointer to it is saved.  See
7322  * existing callers for examples of how static strings are typically
7323  * defined for use with tracing_log_err().
7324  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u8 pos)7325 void tracing_log_err(struct trace_array *tr,
7326 		     const char *loc, const char *cmd,
7327 		     const char **errs, u8 type, u8 pos)
7328 {
7329 	struct tracing_log_err *err;
7330 
7331 	if (!tr)
7332 		tr = &global_trace;
7333 
7334 	mutex_lock(&tracing_err_log_lock);
7335 	err = get_tracing_log_err(tr);
7336 	if (PTR_ERR(err) == -ENOMEM) {
7337 		mutex_unlock(&tracing_err_log_lock);
7338 		return;
7339 	}
7340 
7341 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7342 	snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7343 
7344 	err->info.errs = errs;
7345 	err->info.type = type;
7346 	err->info.pos = pos;
7347 	err->info.ts = local_clock();
7348 
7349 	list_add_tail(&err->list, &tr->err_log);
7350 	mutex_unlock(&tracing_err_log_lock);
7351 }
7352 
clear_tracing_err_log(struct trace_array * tr)7353 static void clear_tracing_err_log(struct trace_array *tr)
7354 {
7355 	struct tracing_log_err *err, *next;
7356 
7357 	mutex_lock(&tracing_err_log_lock);
7358 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7359 		list_del(&err->list);
7360 		kfree(err);
7361 	}
7362 
7363 	tr->n_err_log_entries = 0;
7364 	mutex_unlock(&tracing_err_log_lock);
7365 }
7366 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7367 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7368 {
7369 	struct trace_array *tr = m->private;
7370 
7371 	mutex_lock(&tracing_err_log_lock);
7372 
7373 	return seq_list_start(&tr->err_log, *pos);
7374 }
7375 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7376 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7377 {
7378 	struct trace_array *tr = m->private;
7379 
7380 	return seq_list_next(v, &tr->err_log, pos);
7381 }
7382 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7383 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7384 {
7385 	mutex_unlock(&tracing_err_log_lock);
7386 }
7387 
tracing_err_log_show_pos(struct seq_file * m,u8 pos)7388 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7389 {
7390 	u8 i;
7391 
7392 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7393 		seq_putc(m, ' ');
7394 	for (i = 0; i < pos; i++)
7395 		seq_putc(m, ' ');
7396 	seq_puts(m, "^\n");
7397 }
7398 
tracing_err_log_seq_show(struct seq_file * m,void * v)7399 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7400 {
7401 	struct tracing_log_err *err = v;
7402 
7403 	if (err) {
7404 		const char *err_text = err->info.errs[err->info.type];
7405 		u64 sec = err->info.ts;
7406 		u32 nsec;
7407 
7408 		nsec = do_div(sec, NSEC_PER_SEC);
7409 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7410 			   err->loc, err_text);
7411 		seq_printf(m, "%s", err->cmd);
7412 		tracing_err_log_show_pos(m, err->info.pos);
7413 	}
7414 
7415 	return 0;
7416 }
7417 
7418 static const struct seq_operations tracing_err_log_seq_ops = {
7419 	.start  = tracing_err_log_seq_start,
7420 	.next   = tracing_err_log_seq_next,
7421 	.stop   = tracing_err_log_seq_stop,
7422 	.show   = tracing_err_log_seq_show
7423 };
7424 
tracing_err_log_open(struct inode * inode,struct file * file)7425 static int tracing_err_log_open(struct inode *inode, struct file *file)
7426 {
7427 	struct trace_array *tr = inode->i_private;
7428 	int ret = 0;
7429 
7430 	ret = tracing_check_open_get_tr(tr);
7431 	if (ret)
7432 		return ret;
7433 
7434 	/* If this file was opened for write, then erase contents */
7435 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7436 		clear_tracing_err_log(tr);
7437 
7438 	if (file->f_mode & FMODE_READ) {
7439 		ret = seq_open(file, &tracing_err_log_seq_ops);
7440 		if (!ret) {
7441 			struct seq_file *m = file->private_data;
7442 			m->private = tr;
7443 		} else {
7444 			trace_array_put(tr);
7445 		}
7446 	}
7447 	return ret;
7448 }
7449 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7450 static ssize_t tracing_err_log_write(struct file *file,
7451 				     const char __user *buffer,
7452 				     size_t count, loff_t *ppos)
7453 {
7454 	return count;
7455 }
7456 
tracing_err_log_release(struct inode * inode,struct file * file)7457 static int tracing_err_log_release(struct inode *inode, struct file *file)
7458 {
7459 	struct trace_array *tr = inode->i_private;
7460 
7461 	trace_array_put(tr);
7462 
7463 	if (file->f_mode & FMODE_READ)
7464 		seq_release(inode, file);
7465 
7466 	return 0;
7467 }
7468 
7469 static const struct file_operations tracing_err_log_fops = {
7470 	.open           = tracing_err_log_open,
7471 	.write		= tracing_err_log_write,
7472 	.read           = seq_read,
7473 	.llseek         = seq_lseek,
7474 	.release        = tracing_err_log_release,
7475 };
7476 
tracing_buffers_open(struct inode * inode,struct file * filp)7477 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7478 {
7479 	struct trace_array *tr = inode->i_private;
7480 	struct ftrace_buffer_info *info;
7481 	int ret;
7482 
7483 	ret = tracing_check_open_get_tr(tr);
7484 	if (ret)
7485 		return ret;
7486 
7487 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7488 	if (!info) {
7489 		trace_array_put(tr);
7490 		return -ENOMEM;
7491 	}
7492 
7493 	mutex_lock(&trace_types_lock);
7494 
7495 	info->iter.tr		= tr;
7496 	info->iter.cpu_file	= tracing_get_cpu(inode);
7497 	info->iter.trace	= tr->current_trace;
7498 	info->iter.array_buffer = &tr->array_buffer;
7499 	info->spare		= NULL;
7500 	/* Force reading ring buffer for first read */
7501 	info->read		= (unsigned int)-1;
7502 
7503 	filp->private_data = info;
7504 
7505 	tr->trace_ref++;
7506 
7507 	mutex_unlock(&trace_types_lock);
7508 
7509 	ret = nonseekable_open(inode, filp);
7510 	if (ret < 0)
7511 		trace_array_put(tr);
7512 
7513 	return ret;
7514 }
7515 
7516 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7517 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7518 {
7519 	struct ftrace_buffer_info *info = filp->private_data;
7520 	struct trace_iterator *iter = &info->iter;
7521 
7522 	return trace_poll(iter, filp, poll_table);
7523 }
7524 
7525 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7526 tracing_buffers_read(struct file *filp, char __user *ubuf,
7527 		     size_t count, loff_t *ppos)
7528 {
7529 	struct ftrace_buffer_info *info = filp->private_data;
7530 	struct trace_iterator *iter = &info->iter;
7531 	ssize_t ret = 0;
7532 	ssize_t size;
7533 
7534 	if (!count)
7535 		return 0;
7536 
7537 #ifdef CONFIG_TRACER_MAX_TRACE
7538 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7539 		return -EBUSY;
7540 #endif
7541 
7542 	if (!info->spare) {
7543 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7544 							  iter->cpu_file);
7545 		if (IS_ERR(info->spare)) {
7546 			ret = PTR_ERR(info->spare);
7547 			info->spare = NULL;
7548 		} else {
7549 			info->spare_cpu = iter->cpu_file;
7550 		}
7551 	}
7552 	if (!info->spare)
7553 		return ret;
7554 
7555 	/* Do we have previous read data to read? */
7556 	if (info->read < PAGE_SIZE)
7557 		goto read;
7558 
7559  again:
7560 	trace_access_lock(iter->cpu_file);
7561 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7562 				    &info->spare,
7563 				    count,
7564 				    iter->cpu_file, 0);
7565 	trace_access_unlock(iter->cpu_file);
7566 
7567 	if (ret < 0) {
7568 		if (trace_empty(iter)) {
7569 			if ((filp->f_flags & O_NONBLOCK))
7570 				return -EAGAIN;
7571 
7572 			ret = wait_on_pipe(iter, 0);
7573 			if (ret)
7574 				return ret;
7575 
7576 			goto again;
7577 		}
7578 		return 0;
7579 	}
7580 
7581 	info->read = 0;
7582  read:
7583 	size = PAGE_SIZE - info->read;
7584 	if (size > count)
7585 		size = count;
7586 
7587 	ret = copy_to_user(ubuf, info->spare + info->read, size);
7588 	if (ret == size)
7589 		return -EFAULT;
7590 
7591 	size -= ret;
7592 
7593 	*ppos += size;
7594 	info->read += size;
7595 
7596 	return size;
7597 }
7598 
tracing_buffers_release(struct inode * inode,struct file * file)7599 static int tracing_buffers_release(struct inode *inode, struct file *file)
7600 {
7601 	struct ftrace_buffer_info *info = file->private_data;
7602 	struct trace_iterator *iter = &info->iter;
7603 
7604 	mutex_lock(&trace_types_lock);
7605 
7606 	iter->tr->trace_ref--;
7607 
7608 	__trace_array_put(iter->tr);
7609 
7610 	if (info->spare)
7611 		ring_buffer_free_read_page(iter->array_buffer->buffer,
7612 					   info->spare_cpu, info->spare);
7613 	kvfree(info);
7614 
7615 	mutex_unlock(&trace_types_lock);
7616 
7617 	return 0;
7618 }
7619 
7620 struct buffer_ref {
7621 	struct trace_buffer	*buffer;
7622 	void			*page;
7623 	int			cpu;
7624 	refcount_t		refcount;
7625 };
7626 
buffer_ref_release(struct buffer_ref * ref)7627 static void buffer_ref_release(struct buffer_ref *ref)
7628 {
7629 	if (!refcount_dec_and_test(&ref->refcount))
7630 		return;
7631 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7632 	kfree(ref);
7633 }
7634 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7635 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7636 				    struct pipe_buffer *buf)
7637 {
7638 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7639 
7640 	buffer_ref_release(ref);
7641 	buf->private = 0;
7642 }
7643 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7644 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7645 				struct pipe_buffer *buf)
7646 {
7647 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7648 
7649 	if (refcount_read(&ref->refcount) > INT_MAX/2)
7650 		return false;
7651 
7652 	refcount_inc(&ref->refcount);
7653 	return true;
7654 }
7655 
7656 /* Pipe buffer operations for a buffer. */
7657 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7658 	.release		= buffer_pipe_buf_release,
7659 	.get			= buffer_pipe_buf_get,
7660 };
7661 
7662 /*
7663  * Callback from splice_to_pipe(), if we need to release some pages
7664  * at the end of the spd in case we error'ed out in filling the pipe.
7665  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7666 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7667 {
7668 	struct buffer_ref *ref =
7669 		(struct buffer_ref *)spd->partial[i].private;
7670 
7671 	buffer_ref_release(ref);
7672 	spd->partial[i].private = 0;
7673 }
7674 
7675 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7676 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7677 			    struct pipe_inode_info *pipe, size_t len,
7678 			    unsigned int flags)
7679 {
7680 	struct ftrace_buffer_info *info = file->private_data;
7681 	struct trace_iterator *iter = &info->iter;
7682 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7683 	struct page *pages_def[PIPE_DEF_BUFFERS];
7684 	struct splice_pipe_desc spd = {
7685 		.pages		= pages_def,
7686 		.partial	= partial_def,
7687 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7688 		.ops		= &buffer_pipe_buf_ops,
7689 		.spd_release	= buffer_spd_release,
7690 	};
7691 	struct buffer_ref *ref;
7692 	int entries, i;
7693 	ssize_t ret = 0;
7694 
7695 #ifdef CONFIG_TRACER_MAX_TRACE
7696 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7697 		return -EBUSY;
7698 #endif
7699 
7700 	if (*ppos & (PAGE_SIZE - 1))
7701 		return -EINVAL;
7702 
7703 	if (len & (PAGE_SIZE - 1)) {
7704 		if (len < PAGE_SIZE)
7705 			return -EINVAL;
7706 		len &= PAGE_MASK;
7707 	}
7708 
7709 	if (splice_grow_spd(pipe, &spd))
7710 		return -ENOMEM;
7711 
7712  again:
7713 	trace_access_lock(iter->cpu_file);
7714 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7715 
7716 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7717 		struct page *page;
7718 		int r;
7719 
7720 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7721 		if (!ref) {
7722 			ret = -ENOMEM;
7723 			break;
7724 		}
7725 
7726 		refcount_set(&ref->refcount, 1);
7727 		ref->buffer = iter->array_buffer->buffer;
7728 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7729 		if (IS_ERR(ref->page)) {
7730 			ret = PTR_ERR(ref->page);
7731 			ref->page = NULL;
7732 			kfree(ref);
7733 			break;
7734 		}
7735 		ref->cpu = iter->cpu_file;
7736 
7737 		r = ring_buffer_read_page(ref->buffer, &ref->page,
7738 					  len, iter->cpu_file, 1);
7739 		if (r < 0) {
7740 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
7741 						   ref->page);
7742 			kfree(ref);
7743 			break;
7744 		}
7745 
7746 		page = virt_to_page(ref->page);
7747 
7748 		spd.pages[i] = page;
7749 		spd.partial[i].len = PAGE_SIZE;
7750 		spd.partial[i].offset = 0;
7751 		spd.partial[i].private = (unsigned long)ref;
7752 		spd.nr_pages++;
7753 		*ppos += PAGE_SIZE;
7754 
7755 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7756 	}
7757 
7758 	trace_access_unlock(iter->cpu_file);
7759 	spd.nr_pages = i;
7760 
7761 	/* did we read anything? */
7762 	if (!spd.nr_pages) {
7763 		if (ret)
7764 			goto out;
7765 
7766 		ret = -EAGAIN;
7767 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7768 			goto out;
7769 
7770 		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7771 		if (ret)
7772 			goto out;
7773 
7774 		goto again;
7775 	}
7776 
7777 	ret = splice_to_pipe(pipe, &spd);
7778 out:
7779 	splice_shrink_spd(&spd);
7780 
7781 	return ret;
7782 }
7783 
7784 static const struct file_operations tracing_buffers_fops = {
7785 	.open		= tracing_buffers_open,
7786 	.read		= tracing_buffers_read,
7787 	.poll		= tracing_buffers_poll,
7788 	.release	= tracing_buffers_release,
7789 	.splice_read	= tracing_buffers_splice_read,
7790 	.llseek		= no_llseek,
7791 };
7792 
7793 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7794 tracing_stats_read(struct file *filp, char __user *ubuf,
7795 		   size_t count, loff_t *ppos)
7796 {
7797 	struct inode *inode = file_inode(filp);
7798 	struct trace_array *tr = inode->i_private;
7799 	struct array_buffer *trace_buf = &tr->array_buffer;
7800 	int cpu = tracing_get_cpu(inode);
7801 	struct trace_seq *s;
7802 	unsigned long cnt;
7803 	unsigned long long t;
7804 	unsigned long usec_rem;
7805 
7806 	s = kmalloc(sizeof(*s), GFP_KERNEL);
7807 	if (!s)
7808 		return -ENOMEM;
7809 
7810 	trace_seq_init(s);
7811 
7812 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7813 	trace_seq_printf(s, "entries: %ld\n", cnt);
7814 
7815 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7816 	trace_seq_printf(s, "overrun: %ld\n", cnt);
7817 
7818 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7819 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7820 
7821 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7822 	trace_seq_printf(s, "bytes: %ld\n", cnt);
7823 
7824 	if (trace_clocks[tr->clock_id].in_ns) {
7825 		/* local or global for trace_clock */
7826 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7827 		usec_rem = do_div(t, USEC_PER_SEC);
7828 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7829 								t, usec_rem);
7830 
7831 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7832 		usec_rem = do_div(t, USEC_PER_SEC);
7833 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7834 	} else {
7835 		/* counter or tsc mode for trace_clock */
7836 		trace_seq_printf(s, "oldest event ts: %llu\n",
7837 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7838 
7839 		trace_seq_printf(s, "now ts: %llu\n",
7840 				ring_buffer_time_stamp(trace_buf->buffer, cpu));
7841 	}
7842 
7843 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7844 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
7845 
7846 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7847 	trace_seq_printf(s, "read events: %ld\n", cnt);
7848 
7849 	count = simple_read_from_buffer(ubuf, count, ppos,
7850 					s->buffer, trace_seq_used(s));
7851 
7852 	kfree(s);
7853 
7854 	return count;
7855 }
7856 
7857 static const struct file_operations tracing_stats_fops = {
7858 	.open		= tracing_open_generic_tr,
7859 	.read		= tracing_stats_read,
7860 	.llseek		= generic_file_llseek,
7861 	.release	= tracing_release_generic_tr,
7862 };
7863 
7864 #ifdef CONFIG_DYNAMIC_FTRACE
7865 
7866 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7867 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7868 		  size_t cnt, loff_t *ppos)
7869 {
7870 	ssize_t ret;
7871 	char *buf;
7872 	int r;
7873 
7874 	/* 256 should be plenty to hold the amount needed */
7875 	buf = kmalloc(256, GFP_KERNEL);
7876 	if (!buf)
7877 		return -ENOMEM;
7878 
7879 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7880 		      ftrace_update_tot_cnt,
7881 		      ftrace_number_of_pages,
7882 		      ftrace_number_of_groups);
7883 
7884 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7885 	kfree(buf);
7886 	return ret;
7887 }
7888 
7889 static const struct file_operations tracing_dyn_info_fops = {
7890 	.open		= tracing_open_generic,
7891 	.read		= tracing_read_dyn_info,
7892 	.llseek		= generic_file_llseek,
7893 };
7894 #endif /* CONFIG_DYNAMIC_FTRACE */
7895 
7896 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7897 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7898 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7899 		struct trace_array *tr, struct ftrace_probe_ops *ops,
7900 		void *data)
7901 {
7902 	tracing_snapshot_instance(tr);
7903 }
7904 
7905 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)7906 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7907 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
7908 		      void *data)
7909 {
7910 	struct ftrace_func_mapper *mapper = data;
7911 	long *count = NULL;
7912 
7913 	if (mapper)
7914 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7915 
7916 	if (count) {
7917 
7918 		if (*count <= 0)
7919 			return;
7920 
7921 		(*count)--;
7922 	}
7923 
7924 	tracing_snapshot_instance(tr);
7925 }
7926 
7927 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)7928 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7929 		      struct ftrace_probe_ops *ops, void *data)
7930 {
7931 	struct ftrace_func_mapper *mapper = data;
7932 	long *count = NULL;
7933 
7934 	seq_printf(m, "%ps:", (void *)ip);
7935 
7936 	seq_puts(m, "snapshot");
7937 
7938 	if (mapper)
7939 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7940 
7941 	if (count)
7942 		seq_printf(m, ":count=%ld\n", *count);
7943 	else
7944 		seq_puts(m, ":unlimited\n");
7945 
7946 	return 0;
7947 }
7948 
7949 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)7950 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7951 		     unsigned long ip, void *init_data, void **data)
7952 {
7953 	struct ftrace_func_mapper *mapper = *data;
7954 
7955 	if (!mapper) {
7956 		mapper = allocate_ftrace_func_mapper();
7957 		if (!mapper)
7958 			return -ENOMEM;
7959 		*data = mapper;
7960 	}
7961 
7962 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7963 }
7964 
7965 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)7966 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7967 		     unsigned long ip, void *data)
7968 {
7969 	struct ftrace_func_mapper *mapper = data;
7970 
7971 	if (!ip) {
7972 		if (!mapper)
7973 			return;
7974 		free_ftrace_func_mapper(mapper, NULL);
7975 		return;
7976 	}
7977 
7978 	ftrace_func_mapper_remove_ip(mapper, ip);
7979 }
7980 
7981 static struct ftrace_probe_ops snapshot_probe_ops = {
7982 	.func			= ftrace_snapshot,
7983 	.print			= ftrace_snapshot_print,
7984 };
7985 
7986 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7987 	.func			= ftrace_count_snapshot,
7988 	.print			= ftrace_snapshot_print,
7989 	.init			= ftrace_snapshot_init,
7990 	.free			= ftrace_snapshot_free,
7991 };
7992 
7993 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)7994 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7995 			       char *glob, char *cmd, char *param, int enable)
7996 {
7997 	struct ftrace_probe_ops *ops;
7998 	void *count = (void *)-1;
7999 	char *number;
8000 	int ret;
8001 
8002 	if (!tr)
8003 		return -ENODEV;
8004 
8005 	/* hash funcs only work with set_ftrace_filter */
8006 	if (!enable)
8007 		return -EINVAL;
8008 
8009 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8010 
8011 	if (glob[0] == '!')
8012 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8013 
8014 	if (!param)
8015 		goto out_reg;
8016 
8017 	number = strsep(&param, ":");
8018 
8019 	if (!strlen(number))
8020 		goto out_reg;
8021 
8022 	/*
8023 	 * We use the callback data field (which is a pointer)
8024 	 * as our counter.
8025 	 */
8026 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8027 	if (ret)
8028 		return ret;
8029 
8030  out_reg:
8031 	ret = tracing_alloc_snapshot_instance(tr);
8032 	if (ret < 0)
8033 		goto out;
8034 
8035 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8036 
8037  out:
8038 	return ret < 0 ? ret : 0;
8039 }
8040 
8041 static struct ftrace_func_command ftrace_snapshot_cmd = {
8042 	.name			= "snapshot",
8043 	.func			= ftrace_trace_snapshot_callback,
8044 };
8045 
register_snapshot_cmd(void)8046 static __init int register_snapshot_cmd(void)
8047 {
8048 	return register_ftrace_command(&ftrace_snapshot_cmd);
8049 }
8050 #else
register_snapshot_cmd(void)8051 static inline __init int register_snapshot_cmd(void) { return 0; }
8052 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8053 
tracing_get_dentry(struct trace_array * tr)8054 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8055 {
8056 	if (WARN_ON(!tr->dir))
8057 		return ERR_PTR(-ENODEV);
8058 
8059 	/* Top directory uses NULL as the parent */
8060 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8061 		return NULL;
8062 
8063 	/* All sub buffers have a descriptor */
8064 	return tr->dir;
8065 }
8066 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8067 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8068 {
8069 	struct dentry *d_tracer;
8070 
8071 	if (tr->percpu_dir)
8072 		return tr->percpu_dir;
8073 
8074 	d_tracer = tracing_get_dentry(tr);
8075 	if (IS_ERR(d_tracer))
8076 		return NULL;
8077 
8078 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8079 
8080 	MEM_FAIL(!tr->percpu_dir,
8081 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8082 
8083 	return tr->percpu_dir;
8084 }
8085 
8086 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8087 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8088 		      void *data, long cpu, const struct file_operations *fops)
8089 {
8090 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8091 
8092 	if (ret) /* See tracing_get_cpu() */
8093 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8094 	return ret;
8095 }
8096 
8097 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8098 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8099 {
8100 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8101 	struct dentry *d_cpu;
8102 	char cpu_dir[30]; /* 30 characters should be more than enough */
8103 
8104 	if (!d_percpu)
8105 		return;
8106 
8107 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8108 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8109 	if (!d_cpu) {
8110 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8111 		return;
8112 	}
8113 
8114 	/* per cpu trace_pipe */
8115 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8116 				tr, cpu, &tracing_pipe_fops);
8117 
8118 	/* per cpu trace */
8119 	trace_create_cpu_file("trace", 0644, d_cpu,
8120 				tr, cpu, &tracing_fops);
8121 
8122 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8123 				tr, cpu, &tracing_buffers_fops);
8124 
8125 	trace_create_cpu_file("stats", 0444, d_cpu,
8126 				tr, cpu, &tracing_stats_fops);
8127 
8128 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8129 				tr, cpu, &tracing_entries_fops);
8130 
8131 #ifdef CONFIG_TRACER_SNAPSHOT
8132 	trace_create_cpu_file("snapshot", 0644, d_cpu,
8133 				tr, cpu, &snapshot_fops);
8134 
8135 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8136 				tr, cpu, &snapshot_raw_fops);
8137 #endif
8138 }
8139 
8140 #ifdef CONFIG_FTRACE_SELFTEST
8141 /* Let selftest have access to static functions in this file */
8142 #include "trace_selftest.c"
8143 #endif
8144 
8145 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8146 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8147 			loff_t *ppos)
8148 {
8149 	struct trace_option_dentry *topt = filp->private_data;
8150 	char *buf;
8151 
8152 	if (topt->flags->val & topt->opt->bit)
8153 		buf = "1\n";
8154 	else
8155 		buf = "0\n";
8156 
8157 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8158 }
8159 
8160 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8161 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8162 			 loff_t *ppos)
8163 {
8164 	struct trace_option_dentry *topt = filp->private_data;
8165 	unsigned long val;
8166 	int ret;
8167 
8168 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8169 	if (ret)
8170 		return ret;
8171 
8172 	if (val != 0 && val != 1)
8173 		return -EINVAL;
8174 
8175 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8176 		mutex_lock(&trace_types_lock);
8177 		ret = __set_tracer_option(topt->tr, topt->flags,
8178 					  topt->opt, !val);
8179 		mutex_unlock(&trace_types_lock);
8180 		if (ret)
8181 			return ret;
8182 	}
8183 
8184 	*ppos += cnt;
8185 
8186 	return cnt;
8187 }
8188 
8189 
8190 static const struct file_operations trace_options_fops = {
8191 	.open = tracing_open_generic,
8192 	.read = trace_options_read,
8193 	.write = trace_options_write,
8194 	.llseek	= generic_file_llseek,
8195 };
8196 
8197 /*
8198  * In order to pass in both the trace_array descriptor as well as the index
8199  * to the flag that the trace option file represents, the trace_array
8200  * has a character array of trace_flags_index[], which holds the index
8201  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8202  * The address of this character array is passed to the flag option file
8203  * read/write callbacks.
8204  *
8205  * In order to extract both the index and the trace_array descriptor,
8206  * get_tr_index() uses the following algorithm.
8207  *
8208  *   idx = *ptr;
8209  *
8210  * As the pointer itself contains the address of the index (remember
8211  * index[1] == 1).
8212  *
8213  * Then to get the trace_array descriptor, by subtracting that index
8214  * from the ptr, we get to the start of the index itself.
8215  *
8216  *   ptr - idx == &index[0]
8217  *
8218  * Then a simple container_of() from that pointer gets us to the
8219  * trace_array descriptor.
8220  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8221 static void get_tr_index(void *data, struct trace_array **ptr,
8222 			 unsigned int *pindex)
8223 {
8224 	*pindex = *(unsigned char *)data;
8225 
8226 	*ptr = container_of(data - *pindex, struct trace_array,
8227 			    trace_flags_index);
8228 }
8229 
8230 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8231 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8232 			loff_t *ppos)
8233 {
8234 	void *tr_index = filp->private_data;
8235 	struct trace_array *tr;
8236 	unsigned int index;
8237 	char *buf;
8238 
8239 	get_tr_index(tr_index, &tr, &index);
8240 
8241 	if (tr->trace_flags & (1 << index))
8242 		buf = "1\n";
8243 	else
8244 		buf = "0\n";
8245 
8246 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8247 }
8248 
8249 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8250 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8251 			 loff_t *ppos)
8252 {
8253 	void *tr_index = filp->private_data;
8254 	struct trace_array *tr;
8255 	unsigned int index;
8256 	unsigned long val;
8257 	int ret;
8258 
8259 	get_tr_index(tr_index, &tr, &index);
8260 
8261 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8262 	if (ret)
8263 		return ret;
8264 
8265 	if (val != 0 && val != 1)
8266 		return -EINVAL;
8267 
8268 	mutex_lock(&event_mutex);
8269 	mutex_lock(&trace_types_lock);
8270 	ret = set_tracer_flag(tr, 1 << index, val);
8271 	mutex_unlock(&trace_types_lock);
8272 	mutex_unlock(&event_mutex);
8273 
8274 	if (ret < 0)
8275 		return ret;
8276 
8277 	*ppos += cnt;
8278 
8279 	return cnt;
8280 }
8281 
8282 static const struct file_operations trace_options_core_fops = {
8283 	.open = tracing_open_generic,
8284 	.read = trace_options_core_read,
8285 	.write = trace_options_core_write,
8286 	.llseek = generic_file_llseek,
8287 };
8288 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8289 struct dentry *trace_create_file(const char *name,
8290 				 umode_t mode,
8291 				 struct dentry *parent,
8292 				 void *data,
8293 				 const struct file_operations *fops)
8294 {
8295 	struct dentry *ret;
8296 
8297 	ret = tracefs_create_file(name, mode, parent, data, fops);
8298 	if (!ret)
8299 		pr_warn("Could not create tracefs '%s' entry\n", name);
8300 
8301 	return ret;
8302 }
8303 
8304 
trace_options_init_dentry(struct trace_array * tr)8305 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8306 {
8307 	struct dentry *d_tracer;
8308 
8309 	if (tr->options)
8310 		return tr->options;
8311 
8312 	d_tracer = tracing_get_dentry(tr);
8313 	if (IS_ERR(d_tracer))
8314 		return NULL;
8315 
8316 	tr->options = tracefs_create_dir("options", d_tracer);
8317 	if (!tr->options) {
8318 		pr_warn("Could not create tracefs directory 'options'\n");
8319 		return NULL;
8320 	}
8321 
8322 	return tr->options;
8323 }
8324 
8325 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8326 create_trace_option_file(struct trace_array *tr,
8327 			 struct trace_option_dentry *topt,
8328 			 struct tracer_flags *flags,
8329 			 struct tracer_opt *opt)
8330 {
8331 	struct dentry *t_options;
8332 
8333 	t_options = trace_options_init_dentry(tr);
8334 	if (!t_options)
8335 		return;
8336 
8337 	topt->flags = flags;
8338 	topt->opt = opt;
8339 	topt->tr = tr;
8340 
8341 	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8342 				    &trace_options_fops);
8343 
8344 }
8345 
8346 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8347 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8348 {
8349 	struct trace_option_dentry *topts;
8350 	struct trace_options *tr_topts;
8351 	struct tracer_flags *flags;
8352 	struct tracer_opt *opts;
8353 	int cnt;
8354 	int i;
8355 
8356 	if (!tracer)
8357 		return;
8358 
8359 	flags = tracer->flags;
8360 
8361 	if (!flags || !flags->opts)
8362 		return;
8363 
8364 	/*
8365 	 * If this is an instance, only create flags for tracers
8366 	 * the instance may have.
8367 	 */
8368 	if (!trace_ok_for_array(tracer, tr))
8369 		return;
8370 
8371 	for (i = 0; i < tr->nr_topts; i++) {
8372 		/* Make sure there's no duplicate flags. */
8373 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8374 			return;
8375 	}
8376 
8377 	opts = flags->opts;
8378 
8379 	for (cnt = 0; opts[cnt].name; cnt++)
8380 		;
8381 
8382 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8383 	if (!topts)
8384 		return;
8385 
8386 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8387 			    GFP_KERNEL);
8388 	if (!tr_topts) {
8389 		kfree(topts);
8390 		return;
8391 	}
8392 
8393 	tr->topts = tr_topts;
8394 	tr->topts[tr->nr_topts].tracer = tracer;
8395 	tr->topts[tr->nr_topts].topts = topts;
8396 	tr->nr_topts++;
8397 
8398 	for (cnt = 0; opts[cnt].name; cnt++) {
8399 		create_trace_option_file(tr, &topts[cnt], flags,
8400 					 &opts[cnt]);
8401 		MEM_FAIL(topts[cnt].entry == NULL,
8402 			  "Failed to create trace option: %s",
8403 			  opts[cnt].name);
8404 	}
8405 }
8406 
8407 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8408 create_trace_option_core_file(struct trace_array *tr,
8409 			      const char *option, long index)
8410 {
8411 	struct dentry *t_options;
8412 
8413 	t_options = trace_options_init_dentry(tr);
8414 	if (!t_options)
8415 		return NULL;
8416 
8417 	return trace_create_file(option, 0644, t_options,
8418 				 (void *)&tr->trace_flags_index[index],
8419 				 &trace_options_core_fops);
8420 }
8421 
create_trace_options_dir(struct trace_array * tr)8422 static void create_trace_options_dir(struct trace_array *tr)
8423 {
8424 	struct dentry *t_options;
8425 	bool top_level = tr == &global_trace;
8426 	int i;
8427 
8428 	t_options = trace_options_init_dentry(tr);
8429 	if (!t_options)
8430 		return;
8431 
8432 	for (i = 0; trace_options[i]; i++) {
8433 		if (top_level ||
8434 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8435 			create_trace_option_core_file(tr, trace_options[i], i);
8436 	}
8437 }
8438 
8439 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8440 rb_simple_read(struct file *filp, char __user *ubuf,
8441 	       size_t cnt, loff_t *ppos)
8442 {
8443 	struct trace_array *tr = filp->private_data;
8444 	char buf[64];
8445 	int r;
8446 
8447 	r = tracer_tracing_is_on(tr);
8448 	r = sprintf(buf, "%d\n", r);
8449 
8450 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8451 }
8452 
8453 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8454 rb_simple_write(struct file *filp, const char __user *ubuf,
8455 		size_t cnt, loff_t *ppos)
8456 {
8457 	struct trace_array *tr = filp->private_data;
8458 	struct trace_buffer *buffer = tr->array_buffer.buffer;
8459 	unsigned long val;
8460 	int ret;
8461 
8462 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8463 	if (ret)
8464 		return ret;
8465 
8466 	if (buffer) {
8467 		mutex_lock(&trace_types_lock);
8468 		if (!!val == tracer_tracing_is_on(tr)) {
8469 			val = 0; /* do nothing */
8470 		} else if (val) {
8471 			tracer_tracing_on(tr);
8472 			if (tr->current_trace->start)
8473 				tr->current_trace->start(tr);
8474 		} else {
8475 			tracer_tracing_off(tr);
8476 			if (tr->current_trace->stop)
8477 				tr->current_trace->stop(tr);
8478 		}
8479 		mutex_unlock(&trace_types_lock);
8480 	}
8481 
8482 	(*ppos)++;
8483 
8484 	return cnt;
8485 }
8486 
8487 static const struct file_operations rb_simple_fops = {
8488 	.open		= tracing_open_generic_tr,
8489 	.read		= rb_simple_read,
8490 	.write		= rb_simple_write,
8491 	.release	= tracing_release_generic_tr,
8492 	.llseek		= default_llseek,
8493 };
8494 
8495 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8496 buffer_percent_read(struct file *filp, char __user *ubuf,
8497 		    size_t cnt, loff_t *ppos)
8498 {
8499 	struct trace_array *tr = filp->private_data;
8500 	char buf[64];
8501 	int r;
8502 
8503 	r = tr->buffer_percent;
8504 	r = sprintf(buf, "%d\n", r);
8505 
8506 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8507 }
8508 
8509 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8510 buffer_percent_write(struct file *filp, const char __user *ubuf,
8511 		     size_t cnt, loff_t *ppos)
8512 {
8513 	struct trace_array *tr = filp->private_data;
8514 	unsigned long val;
8515 	int ret;
8516 
8517 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8518 	if (ret)
8519 		return ret;
8520 
8521 	if (val > 100)
8522 		return -EINVAL;
8523 
8524 	if (!val)
8525 		val = 1;
8526 
8527 	tr->buffer_percent = val;
8528 
8529 	(*ppos)++;
8530 
8531 	return cnt;
8532 }
8533 
8534 static const struct file_operations buffer_percent_fops = {
8535 	.open		= tracing_open_generic_tr,
8536 	.read		= buffer_percent_read,
8537 	.write		= buffer_percent_write,
8538 	.release	= tracing_release_generic_tr,
8539 	.llseek		= default_llseek,
8540 };
8541 
8542 static struct dentry *trace_instance_dir;
8543 
8544 static void
8545 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8546 
8547 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8548 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8549 {
8550 	enum ring_buffer_flags rb_flags;
8551 
8552 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8553 
8554 	buf->tr = tr;
8555 
8556 	buf->buffer = ring_buffer_alloc(size, rb_flags);
8557 	if (!buf->buffer)
8558 		return -ENOMEM;
8559 
8560 	buf->data = alloc_percpu(struct trace_array_cpu);
8561 	if (!buf->data) {
8562 		ring_buffer_free(buf->buffer);
8563 		buf->buffer = NULL;
8564 		return -ENOMEM;
8565 	}
8566 
8567 	/* Allocate the first page for all buffers */
8568 	set_buffer_entries(&tr->array_buffer,
8569 			   ring_buffer_size(tr->array_buffer.buffer, 0));
8570 
8571 	return 0;
8572 }
8573 
allocate_trace_buffers(struct trace_array * tr,int size)8574 static int allocate_trace_buffers(struct trace_array *tr, int size)
8575 {
8576 	int ret;
8577 
8578 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8579 	if (ret)
8580 		return ret;
8581 
8582 #ifdef CONFIG_TRACER_MAX_TRACE
8583 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
8584 				    allocate_snapshot ? size : 1);
8585 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8586 		ring_buffer_free(tr->array_buffer.buffer);
8587 		tr->array_buffer.buffer = NULL;
8588 		free_percpu(tr->array_buffer.data);
8589 		tr->array_buffer.data = NULL;
8590 		return -ENOMEM;
8591 	}
8592 	tr->allocated_snapshot = allocate_snapshot;
8593 
8594 	/*
8595 	 * Only the top level trace array gets its snapshot allocated
8596 	 * from the kernel command line.
8597 	 */
8598 	allocate_snapshot = false;
8599 #endif
8600 
8601 	return 0;
8602 }
8603 
free_trace_buffer(struct array_buffer * buf)8604 static void free_trace_buffer(struct array_buffer *buf)
8605 {
8606 	if (buf->buffer) {
8607 		ring_buffer_free(buf->buffer);
8608 		buf->buffer = NULL;
8609 		free_percpu(buf->data);
8610 		buf->data = NULL;
8611 	}
8612 }
8613 
free_trace_buffers(struct trace_array * tr)8614 static void free_trace_buffers(struct trace_array *tr)
8615 {
8616 	if (!tr)
8617 		return;
8618 
8619 	free_trace_buffer(&tr->array_buffer);
8620 
8621 #ifdef CONFIG_TRACER_MAX_TRACE
8622 	free_trace_buffer(&tr->max_buffer);
8623 #endif
8624 }
8625 
init_trace_flags_index(struct trace_array * tr)8626 static void init_trace_flags_index(struct trace_array *tr)
8627 {
8628 	int i;
8629 
8630 	/* Used by the trace options files */
8631 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8632 		tr->trace_flags_index[i] = i;
8633 }
8634 
__update_tracer_options(struct trace_array * tr)8635 static void __update_tracer_options(struct trace_array *tr)
8636 {
8637 	struct tracer *t;
8638 
8639 	for (t = trace_types; t; t = t->next)
8640 		add_tracer_options(tr, t);
8641 }
8642 
update_tracer_options(struct trace_array * tr)8643 static void update_tracer_options(struct trace_array *tr)
8644 {
8645 	mutex_lock(&trace_types_lock);
8646 	__update_tracer_options(tr);
8647 	mutex_unlock(&trace_types_lock);
8648 }
8649 
8650 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8651 struct trace_array *trace_array_find(const char *instance)
8652 {
8653 	struct trace_array *tr, *found = NULL;
8654 
8655 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8656 		if (tr->name && strcmp(tr->name, instance) == 0) {
8657 			found = tr;
8658 			break;
8659 		}
8660 	}
8661 
8662 	return found;
8663 }
8664 
trace_array_find_get(const char * instance)8665 struct trace_array *trace_array_find_get(const char *instance)
8666 {
8667 	struct trace_array *tr;
8668 
8669 	mutex_lock(&trace_types_lock);
8670 	tr = trace_array_find(instance);
8671 	if (tr)
8672 		tr->ref++;
8673 	mutex_unlock(&trace_types_lock);
8674 
8675 	return tr;
8676 }
8677 
trace_array_create_dir(struct trace_array * tr)8678 static int trace_array_create_dir(struct trace_array *tr)
8679 {
8680 	int ret;
8681 
8682 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8683 	if (!tr->dir)
8684 		return -EINVAL;
8685 
8686 	ret = event_trace_add_tracer(tr->dir, tr);
8687 	if (ret) {
8688 		tracefs_remove(tr->dir);
8689 		return ret;
8690 	}
8691 
8692 	init_tracer_tracefs(tr, tr->dir);
8693 	__update_tracer_options(tr);
8694 
8695 	return ret;
8696 }
8697 
trace_array_create(const char * name)8698 static struct trace_array *trace_array_create(const char *name)
8699 {
8700 	struct trace_array *tr;
8701 	int ret;
8702 
8703 	ret = -ENOMEM;
8704 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8705 	if (!tr)
8706 		return ERR_PTR(ret);
8707 
8708 	tr->name = kstrdup(name, GFP_KERNEL);
8709 	if (!tr->name)
8710 		goto out_free_tr;
8711 
8712 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8713 		goto out_free_tr;
8714 
8715 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8716 
8717 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8718 
8719 	raw_spin_lock_init(&tr->start_lock);
8720 
8721 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8722 
8723 	tr->current_trace = &nop_trace;
8724 
8725 	INIT_LIST_HEAD(&tr->systems);
8726 	INIT_LIST_HEAD(&tr->events);
8727 	INIT_LIST_HEAD(&tr->hist_vars);
8728 	INIT_LIST_HEAD(&tr->err_log);
8729 
8730 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8731 		goto out_free_tr;
8732 
8733 	if (ftrace_allocate_ftrace_ops(tr) < 0)
8734 		goto out_free_tr;
8735 
8736 	ftrace_init_trace_array(tr);
8737 
8738 	init_trace_flags_index(tr);
8739 
8740 	if (trace_instance_dir) {
8741 		ret = trace_array_create_dir(tr);
8742 		if (ret)
8743 			goto out_free_tr;
8744 	} else
8745 		__trace_early_add_events(tr);
8746 
8747 	list_add(&tr->list, &ftrace_trace_arrays);
8748 
8749 	tr->ref++;
8750 
8751 	return tr;
8752 
8753  out_free_tr:
8754 	ftrace_free_ftrace_ops(tr);
8755 	free_trace_buffers(tr);
8756 	free_cpumask_var(tr->tracing_cpumask);
8757 	kfree(tr->name);
8758 	kfree(tr);
8759 
8760 	return ERR_PTR(ret);
8761 }
8762 
instance_mkdir(const char * name)8763 static int instance_mkdir(const char *name)
8764 {
8765 	struct trace_array *tr;
8766 	int ret;
8767 
8768 	mutex_lock(&event_mutex);
8769 	mutex_lock(&trace_types_lock);
8770 
8771 	ret = -EEXIST;
8772 	if (trace_array_find(name))
8773 		goto out_unlock;
8774 
8775 	tr = trace_array_create(name);
8776 
8777 	ret = PTR_ERR_OR_ZERO(tr);
8778 
8779 out_unlock:
8780 	mutex_unlock(&trace_types_lock);
8781 	mutex_unlock(&event_mutex);
8782 	return ret;
8783 }
8784 
8785 /**
8786  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8787  * @name: The name of the trace array to be looked up/created.
8788  *
8789  * Returns pointer to trace array with given name.
8790  * NULL, if it cannot be created.
8791  *
8792  * NOTE: This function increments the reference counter associated with the
8793  * trace array returned. This makes sure it cannot be freed while in use.
8794  * Use trace_array_put() once the trace array is no longer needed.
8795  * If the trace_array is to be freed, trace_array_destroy() needs to
8796  * be called after the trace_array_put(), or simply let user space delete
8797  * it from the tracefs instances directory. But until the
8798  * trace_array_put() is called, user space can not delete it.
8799  *
8800  */
trace_array_get_by_name(const char * name)8801 struct trace_array *trace_array_get_by_name(const char *name)
8802 {
8803 	struct trace_array *tr;
8804 
8805 	mutex_lock(&event_mutex);
8806 	mutex_lock(&trace_types_lock);
8807 
8808 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8809 		if (tr->name && strcmp(tr->name, name) == 0)
8810 			goto out_unlock;
8811 	}
8812 
8813 	tr = trace_array_create(name);
8814 
8815 	if (IS_ERR(tr))
8816 		tr = NULL;
8817 out_unlock:
8818 	if (tr)
8819 		tr->ref++;
8820 
8821 	mutex_unlock(&trace_types_lock);
8822 	mutex_unlock(&event_mutex);
8823 	return tr;
8824 }
8825 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8826 
__remove_instance(struct trace_array * tr)8827 static int __remove_instance(struct trace_array *tr)
8828 {
8829 	int i;
8830 
8831 	/* Reference counter for a newly created trace array = 1. */
8832 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8833 		return -EBUSY;
8834 
8835 	list_del(&tr->list);
8836 
8837 	/* Disable all the flags that were enabled coming in */
8838 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8839 		if ((1 << i) & ZEROED_TRACE_FLAGS)
8840 			set_tracer_flag(tr, 1 << i, 0);
8841 	}
8842 
8843 	tracing_set_nop(tr);
8844 	clear_ftrace_function_probes(tr);
8845 	event_trace_del_tracer(tr);
8846 	ftrace_clear_pids(tr);
8847 	ftrace_destroy_function_files(tr);
8848 	tracefs_remove(tr->dir);
8849 	free_trace_buffers(tr);
8850 
8851 	for (i = 0; i < tr->nr_topts; i++) {
8852 		kfree(tr->topts[i].topts);
8853 	}
8854 	kfree(tr->topts);
8855 
8856 	free_cpumask_var(tr->tracing_cpumask);
8857 	kfree(tr->name);
8858 	kfree(tr);
8859 
8860 	return 0;
8861 }
8862 
trace_array_destroy(struct trace_array * this_tr)8863 int trace_array_destroy(struct trace_array *this_tr)
8864 {
8865 	struct trace_array *tr;
8866 	int ret;
8867 
8868 	if (!this_tr)
8869 		return -EINVAL;
8870 
8871 	mutex_lock(&event_mutex);
8872 	mutex_lock(&trace_types_lock);
8873 
8874 	ret = -ENODEV;
8875 
8876 	/* Making sure trace array exists before destroying it. */
8877 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8878 		if (tr == this_tr) {
8879 			ret = __remove_instance(tr);
8880 			break;
8881 		}
8882 	}
8883 
8884 	mutex_unlock(&trace_types_lock);
8885 	mutex_unlock(&event_mutex);
8886 
8887 	return ret;
8888 }
8889 EXPORT_SYMBOL_GPL(trace_array_destroy);
8890 
instance_rmdir(const char * name)8891 static int instance_rmdir(const char *name)
8892 {
8893 	struct trace_array *tr;
8894 	int ret;
8895 
8896 	mutex_lock(&event_mutex);
8897 	mutex_lock(&trace_types_lock);
8898 
8899 	ret = -ENODEV;
8900 	tr = trace_array_find(name);
8901 	if (tr)
8902 		ret = __remove_instance(tr);
8903 
8904 	mutex_unlock(&trace_types_lock);
8905 	mutex_unlock(&event_mutex);
8906 
8907 	return ret;
8908 }
8909 
create_trace_instances(struct dentry * d_tracer)8910 static __init void create_trace_instances(struct dentry *d_tracer)
8911 {
8912 	struct trace_array *tr;
8913 
8914 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8915 							 instance_mkdir,
8916 							 instance_rmdir);
8917 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8918 		return;
8919 
8920 	mutex_lock(&event_mutex);
8921 	mutex_lock(&trace_types_lock);
8922 
8923 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8924 		if (!tr->name)
8925 			continue;
8926 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8927 			     "Failed to create instance directory\n"))
8928 			break;
8929 	}
8930 
8931 	mutex_unlock(&trace_types_lock);
8932 	mutex_unlock(&event_mutex);
8933 }
8934 
8935 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8936 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8937 {
8938 	struct trace_event_file *file;
8939 	int cpu;
8940 
8941 	trace_create_file("available_tracers", 0444, d_tracer,
8942 			tr, &show_traces_fops);
8943 
8944 	trace_create_file("current_tracer", 0644, d_tracer,
8945 			tr, &set_tracer_fops);
8946 
8947 	trace_create_file("tracing_cpumask", 0644, d_tracer,
8948 			  tr, &tracing_cpumask_fops);
8949 
8950 	trace_create_file("trace_options", 0644, d_tracer,
8951 			  tr, &tracing_iter_fops);
8952 
8953 	trace_create_file("trace", 0644, d_tracer,
8954 			  tr, &tracing_fops);
8955 
8956 	trace_create_file("trace_pipe", 0444, d_tracer,
8957 			  tr, &tracing_pipe_fops);
8958 
8959 	trace_create_file("buffer_size_kb", 0644, d_tracer,
8960 			  tr, &tracing_entries_fops);
8961 
8962 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8963 			  tr, &tracing_total_entries_fops);
8964 
8965 	trace_create_file("free_buffer", 0200, d_tracer,
8966 			  tr, &tracing_free_buffer_fops);
8967 
8968 	trace_create_file("trace_marker", 0220, d_tracer,
8969 			  tr, &tracing_mark_fops);
8970 
8971 	file = __find_event_file(tr, "ftrace", "print");
8972 	if (file && file->dir)
8973 		trace_create_file("trigger", 0644, file->dir, file,
8974 				  &event_trigger_fops);
8975 	tr->trace_marker_file = file;
8976 
8977 	trace_create_file("trace_marker_raw", 0220, d_tracer,
8978 			  tr, &tracing_mark_raw_fops);
8979 
8980 	trace_create_file("trace_clock", 0644, d_tracer, tr,
8981 			  &trace_clock_fops);
8982 
8983 	trace_create_file("tracing_on", 0644, d_tracer,
8984 			  tr, &rb_simple_fops);
8985 
8986 	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8987 			  &trace_time_stamp_mode_fops);
8988 
8989 	tr->buffer_percent = 50;
8990 
8991 	trace_create_file("buffer_percent", 0444, d_tracer,
8992 			tr, &buffer_percent_fops);
8993 
8994 	create_trace_options_dir(tr);
8995 
8996 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8997 	trace_create_maxlat_file(tr, d_tracer);
8998 #endif
8999 
9000 	if (ftrace_create_function_files(tr, d_tracer))
9001 		MEM_FAIL(1, "Could not allocate function filter files");
9002 
9003 #ifdef CONFIG_TRACER_SNAPSHOT
9004 	trace_create_file("snapshot", 0644, d_tracer,
9005 			  tr, &snapshot_fops);
9006 #endif
9007 
9008 	trace_create_file("error_log", 0644, d_tracer,
9009 			  tr, &tracing_err_log_fops);
9010 
9011 	for_each_tracing_cpu(cpu)
9012 		tracing_init_tracefs_percpu(tr, cpu);
9013 
9014 	ftrace_init_tracefs(tr, d_tracer);
9015 }
9016 
trace_automount(struct dentry * mntpt,void * ingore)9017 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9018 {
9019 	struct vfsmount *mnt;
9020 	struct file_system_type *type;
9021 
9022 	/*
9023 	 * To maintain backward compatibility for tools that mount
9024 	 * debugfs to get to the tracing facility, tracefs is automatically
9025 	 * mounted to the debugfs/tracing directory.
9026 	 */
9027 	type = get_fs_type("tracefs");
9028 	if (!type)
9029 		return NULL;
9030 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9031 	put_filesystem(type);
9032 	if (IS_ERR(mnt))
9033 		return NULL;
9034 	mntget(mnt);
9035 
9036 	return mnt;
9037 }
9038 
9039 /**
9040  * tracing_init_dentry - initialize top level trace array
9041  *
9042  * This is called when creating files or directories in the tracing
9043  * directory. It is called via fs_initcall() by any of the boot up code
9044  * and expects to return the dentry of the top level tracing directory.
9045  */
tracing_init_dentry(void)9046 int tracing_init_dentry(void)
9047 {
9048 	struct trace_array *tr = &global_trace;
9049 
9050 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9051 		pr_warn("Tracing disabled due to lockdown\n");
9052 		return -EPERM;
9053 	}
9054 
9055 	/* The top level trace array uses  NULL as parent */
9056 	if (tr->dir)
9057 		return 0;
9058 
9059 	if (WARN_ON(!tracefs_initialized()))
9060 		return -ENODEV;
9061 
9062 	/*
9063 	 * As there may still be users that expect the tracing
9064 	 * files to exist in debugfs/tracing, we must automount
9065 	 * the tracefs file system there, so older tools still
9066 	 * work with the newer kerenl.
9067 	 */
9068 	tr->dir = debugfs_create_automount("tracing", NULL,
9069 					   trace_automount, NULL);
9070 
9071 	return 0;
9072 }
9073 
9074 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9075 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9076 
trace_eval_init(void)9077 static void __init trace_eval_init(void)
9078 {
9079 	int len;
9080 
9081 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9082 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9083 }
9084 
9085 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9086 static void trace_module_add_evals(struct module *mod)
9087 {
9088 	if (!mod->num_trace_evals)
9089 		return;
9090 
9091 	/*
9092 	 * Modules with bad taint do not have events created, do
9093 	 * not bother with enums either.
9094 	 */
9095 	if (trace_module_has_bad_taint(mod))
9096 		return;
9097 
9098 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9099 }
9100 
9101 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9102 static void trace_module_remove_evals(struct module *mod)
9103 {
9104 	union trace_eval_map_item *map;
9105 	union trace_eval_map_item **last = &trace_eval_maps;
9106 
9107 	if (!mod->num_trace_evals)
9108 		return;
9109 
9110 	mutex_lock(&trace_eval_mutex);
9111 
9112 	map = trace_eval_maps;
9113 
9114 	while (map) {
9115 		if (map->head.mod == mod)
9116 			break;
9117 		map = trace_eval_jmp_to_tail(map);
9118 		last = &map->tail.next;
9119 		map = map->tail.next;
9120 	}
9121 	if (!map)
9122 		goto out;
9123 
9124 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9125 	kfree(map);
9126  out:
9127 	mutex_unlock(&trace_eval_mutex);
9128 }
9129 #else
trace_module_remove_evals(struct module * mod)9130 static inline void trace_module_remove_evals(struct module *mod) { }
9131 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9132 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9133 static int trace_module_notify(struct notifier_block *self,
9134 			       unsigned long val, void *data)
9135 {
9136 	struct module *mod = data;
9137 
9138 	switch (val) {
9139 	case MODULE_STATE_COMING:
9140 		trace_module_add_evals(mod);
9141 		break;
9142 	case MODULE_STATE_GOING:
9143 		trace_module_remove_evals(mod);
9144 		break;
9145 	}
9146 
9147 	return NOTIFY_OK;
9148 }
9149 
9150 static struct notifier_block trace_module_nb = {
9151 	.notifier_call = trace_module_notify,
9152 	.priority = 0,
9153 };
9154 #endif /* CONFIG_MODULES */
9155 
tracer_init_tracefs(void)9156 static __init int tracer_init_tracefs(void)
9157 {
9158 	int ret;
9159 
9160 	trace_access_lock_init();
9161 
9162 	ret = tracing_init_dentry();
9163 	if (ret)
9164 		return 0;
9165 
9166 	event_trace_init();
9167 
9168 	init_tracer_tracefs(&global_trace, NULL);
9169 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9170 
9171 	trace_create_file("tracing_thresh", 0644, NULL,
9172 			&global_trace, &tracing_thresh_fops);
9173 
9174 	trace_create_file("README", 0444, NULL,
9175 			NULL, &tracing_readme_fops);
9176 
9177 	trace_create_file("saved_cmdlines", 0444, NULL,
9178 			NULL, &tracing_saved_cmdlines_fops);
9179 
9180 	trace_create_file("saved_cmdlines_size", 0644, NULL,
9181 			  NULL, &tracing_saved_cmdlines_size_fops);
9182 
9183 	trace_create_file("saved_tgids", 0444, NULL,
9184 			NULL, &tracing_saved_tgids_fops);
9185 
9186 	trace_eval_init();
9187 
9188 	trace_create_eval_file(NULL);
9189 
9190 #ifdef CONFIG_MODULES
9191 	register_module_notifier(&trace_module_nb);
9192 #endif
9193 
9194 #ifdef CONFIG_DYNAMIC_FTRACE
9195 	trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9196 			NULL, &tracing_dyn_info_fops);
9197 #endif
9198 
9199 	create_trace_instances(NULL);
9200 
9201 	update_tracer_options(&global_trace);
9202 
9203 	return 0;
9204 }
9205 
trace_panic_handler(struct notifier_block * this,unsigned long event,void * unused)9206 static int trace_panic_handler(struct notifier_block *this,
9207 			       unsigned long event, void *unused)
9208 {
9209 	if (ftrace_dump_on_oops)
9210 		ftrace_dump(ftrace_dump_on_oops);
9211 	return NOTIFY_OK;
9212 }
9213 
9214 static struct notifier_block trace_panic_notifier = {
9215 	.notifier_call  = trace_panic_handler,
9216 	.next           = NULL,
9217 	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
9218 };
9219 
trace_die_handler(struct notifier_block * self,unsigned long val,void * data)9220 static int trace_die_handler(struct notifier_block *self,
9221 			     unsigned long val,
9222 			     void *data)
9223 {
9224 	switch (val) {
9225 	case DIE_OOPS:
9226 		if (ftrace_dump_on_oops)
9227 			ftrace_dump(ftrace_dump_on_oops);
9228 		break;
9229 	default:
9230 		break;
9231 	}
9232 	return NOTIFY_OK;
9233 }
9234 
9235 static struct notifier_block trace_die_notifier = {
9236 	.notifier_call = trace_die_handler,
9237 	.priority = 200
9238 };
9239 
9240 /*
9241  * printk is set to max of 1024, we really don't need it that big.
9242  * Nothing should be printing 1000 characters anyway.
9243  */
9244 #define TRACE_MAX_PRINT		1000
9245 
9246 /*
9247  * Define here KERN_TRACE so that we have one place to modify
9248  * it if we decide to change what log level the ftrace dump
9249  * should be at.
9250  */
9251 #define KERN_TRACE		KERN_EMERG
9252 
9253 void
trace_printk_seq(struct trace_seq * s)9254 trace_printk_seq(struct trace_seq *s)
9255 {
9256 	/* Probably should print a warning here. */
9257 	if (s->seq.len >= TRACE_MAX_PRINT)
9258 		s->seq.len = TRACE_MAX_PRINT;
9259 
9260 	/*
9261 	 * More paranoid code. Although the buffer size is set to
9262 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9263 	 * an extra layer of protection.
9264 	 */
9265 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9266 		s->seq.len = s->seq.size - 1;
9267 
9268 	/* should be zero ended, but we are paranoid. */
9269 	s->buffer[s->seq.len] = 0;
9270 
9271 	printk(KERN_TRACE "%s", s->buffer);
9272 
9273 	trace_seq_init(s);
9274 }
9275 
trace_init_global_iter(struct trace_iterator * iter)9276 void trace_init_global_iter(struct trace_iterator *iter)
9277 {
9278 	iter->tr = &global_trace;
9279 	iter->trace = iter->tr->current_trace;
9280 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
9281 	iter->array_buffer = &global_trace.array_buffer;
9282 
9283 	if (iter->trace && iter->trace->open)
9284 		iter->trace->open(iter);
9285 
9286 	/* Annotate start of buffers if we had overruns */
9287 	if (ring_buffer_overruns(iter->array_buffer->buffer))
9288 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
9289 
9290 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
9291 	if (trace_clocks[iter->tr->clock_id].in_ns)
9292 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9293 }
9294 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9295 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9296 {
9297 	/* use static because iter can be a bit big for the stack */
9298 	static struct trace_iterator iter;
9299 	static atomic_t dump_running;
9300 	struct trace_array *tr = &global_trace;
9301 	unsigned int old_userobj;
9302 	unsigned long flags;
9303 	int cnt = 0, cpu;
9304 
9305 	/* Only allow one dump user at a time. */
9306 	if (atomic_inc_return(&dump_running) != 1) {
9307 		atomic_dec(&dump_running);
9308 		return;
9309 	}
9310 
9311 	/*
9312 	 * Always turn off tracing when we dump.
9313 	 * We don't need to show trace output of what happens
9314 	 * between multiple crashes.
9315 	 *
9316 	 * If the user does a sysrq-z, then they can re-enable
9317 	 * tracing with echo 1 > tracing_on.
9318 	 */
9319 	tracing_off();
9320 
9321 	local_irq_save(flags);
9322 	printk_nmi_direct_enter();
9323 
9324 	/* Simulate the iterator */
9325 	trace_init_global_iter(&iter);
9326 	/* Can not use kmalloc for iter.temp */
9327 	iter.temp = static_temp_buf;
9328 	iter.temp_size = STATIC_TEMP_BUF_SIZE;
9329 
9330 	for_each_tracing_cpu(cpu) {
9331 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9332 	}
9333 
9334 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9335 
9336 	/* don't look at user memory in panic mode */
9337 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9338 
9339 	switch (oops_dump_mode) {
9340 	case DUMP_ALL:
9341 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9342 		break;
9343 	case DUMP_ORIG:
9344 		iter.cpu_file = raw_smp_processor_id();
9345 		break;
9346 	case DUMP_NONE:
9347 		goto out_enable;
9348 	default:
9349 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9350 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
9351 	}
9352 
9353 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
9354 
9355 	/* Did function tracer already get disabled? */
9356 	if (ftrace_is_dead()) {
9357 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9358 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9359 	}
9360 
9361 	/*
9362 	 * We need to stop all tracing on all CPUS to read
9363 	 * the next buffer. This is a bit expensive, but is
9364 	 * not done often. We fill all what we can read,
9365 	 * and then release the locks again.
9366 	 */
9367 
9368 	while (!trace_empty(&iter)) {
9369 
9370 		if (!cnt)
9371 			printk(KERN_TRACE "---------------------------------\n");
9372 
9373 		cnt++;
9374 
9375 		trace_iterator_reset(&iter);
9376 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
9377 
9378 		if (trace_find_next_entry_inc(&iter) != NULL) {
9379 			int ret;
9380 
9381 			ret = print_trace_line(&iter);
9382 			if (ret != TRACE_TYPE_NO_CONSUME)
9383 				trace_consume(&iter);
9384 		}
9385 		touch_nmi_watchdog();
9386 
9387 		trace_printk_seq(&iter.seq);
9388 	}
9389 
9390 	if (!cnt)
9391 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
9392 	else
9393 		printk(KERN_TRACE "---------------------------------\n");
9394 
9395  out_enable:
9396 	tr->trace_flags |= old_userobj;
9397 
9398 	for_each_tracing_cpu(cpu) {
9399 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9400 	}
9401 	atomic_dec(&dump_running);
9402 	printk_nmi_direct_exit();
9403 	local_irq_restore(flags);
9404 }
9405 EXPORT_SYMBOL_GPL(ftrace_dump);
9406 
trace_run_command(const char * buf,int (* createfn)(int,char **))9407 int trace_run_command(const char *buf, int (*createfn)(int, char **))
9408 {
9409 	char **argv;
9410 	int argc, ret;
9411 
9412 	argc = 0;
9413 	ret = 0;
9414 	argv = argv_split(GFP_KERNEL, buf, &argc);
9415 	if (!argv)
9416 		return -ENOMEM;
9417 
9418 	if (argc)
9419 		ret = createfn(argc, argv);
9420 
9421 	argv_free(argv);
9422 
9423 	return ret;
9424 }
9425 
9426 #define WRITE_BUFSIZE  4096
9427 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(int,char **))9428 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9429 				size_t count, loff_t *ppos,
9430 				int (*createfn)(int, char **))
9431 {
9432 	char *kbuf, *buf, *tmp;
9433 	int ret = 0;
9434 	size_t done = 0;
9435 	size_t size;
9436 
9437 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9438 	if (!kbuf)
9439 		return -ENOMEM;
9440 
9441 	while (done < count) {
9442 		size = count - done;
9443 
9444 		if (size >= WRITE_BUFSIZE)
9445 			size = WRITE_BUFSIZE - 1;
9446 
9447 		if (copy_from_user(kbuf, buffer + done, size)) {
9448 			ret = -EFAULT;
9449 			goto out;
9450 		}
9451 		kbuf[size] = '\0';
9452 		buf = kbuf;
9453 		do {
9454 			tmp = strchr(buf, '\n');
9455 			if (tmp) {
9456 				*tmp = '\0';
9457 				size = tmp - buf + 1;
9458 			} else {
9459 				size = strlen(buf);
9460 				if (done + size < count) {
9461 					if (buf != kbuf)
9462 						break;
9463 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9464 					pr_warn("Line length is too long: Should be less than %d\n",
9465 						WRITE_BUFSIZE - 2);
9466 					ret = -EINVAL;
9467 					goto out;
9468 				}
9469 			}
9470 			done += size;
9471 
9472 			/* Remove comments */
9473 			tmp = strchr(buf, '#');
9474 
9475 			if (tmp)
9476 				*tmp = '\0';
9477 
9478 			ret = trace_run_command(buf, createfn);
9479 			if (ret)
9480 				goto out;
9481 			buf += size;
9482 
9483 		} while (done < count);
9484 	}
9485 	ret = done;
9486 
9487 out:
9488 	kfree(kbuf);
9489 
9490 	return ret;
9491 }
9492 
tracer_alloc_buffers(void)9493 __init static int tracer_alloc_buffers(void)
9494 {
9495 	int ring_buf_size;
9496 	int ret = -ENOMEM;
9497 
9498 
9499 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9500 		pr_warn("Tracing disabled due to lockdown\n");
9501 		return -EPERM;
9502 	}
9503 
9504 	/*
9505 	 * Make sure we don't accidentally add more trace options
9506 	 * than we have bits for.
9507 	 */
9508 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9509 
9510 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9511 		goto out;
9512 
9513 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9514 		goto out_free_buffer_mask;
9515 
9516 	/* Only allocate trace_printk buffers if a trace_printk exists */
9517 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9518 		/* Must be called before global_trace.buffer is allocated */
9519 		trace_printk_init_buffers();
9520 
9521 	/* To save memory, keep the ring buffer size to its minimum */
9522 	if (ring_buffer_expanded)
9523 		ring_buf_size = trace_buf_size;
9524 	else
9525 		ring_buf_size = 1;
9526 
9527 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9528 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9529 
9530 	raw_spin_lock_init(&global_trace.start_lock);
9531 
9532 	/*
9533 	 * The prepare callbacks allocates some memory for the ring buffer. We
9534 	 * don't free the buffer if the CPU goes down. If we were to free
9535 	 * the buffer, then the user would lose any trace that was in the
9536 	 * buffer. The memory will be removed once the "instance" is removed.
9537 	 */
9538 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9539 				      "trace/RB:preapre", trace_rb_cpu_prepare,
9540 				      NULL);
9541 	if (ret < 0)
9542 		goto out_free_cpumask;
9543 	/* Used for event triggers */
9544 	ret = -ENOMEM;
9545 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9546 	if (!temp_buffer)
9547 		goto out_rm_hp_state;
9548 
9549 	if (trace_create_savedcmd() < 0)
9550 		goto out_free_temp_buffer;
9551 
9552 	/* TODO: make the number of buffers hot pluggable with CPUS */
9553 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9554 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9555 		goto out_free_savedcmd;
9556 	}
9557 
9558 	if (global_trace.buffer_disabled)
9559 		tracing_off();
9560 
9561 	if (trace_boot_clock) {
9562 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
9563 		if (ret < 0)
9564 			pr_warn("Trace clock %s not defined, going back to default\n",
9565 				trace_boot_clock);
9566 	}
9567 
9568 	/*
9569 	 * register_tracer() might reference current_trace, so it
9570 	 * needs to be set before we register anything. This is
9571 	 * just a bootstrap of current_trace anyway.
9572 	 */
9573 	global_trace.current_trace = &nop_trace;
9574 
9575 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9576 
9577 	ftrace_init_global_array_ops(&global_trace);
9578 
9579 	init_trace_flags_index(&global_trace);
9580 
9581 	register_tracer(&nop_trace);
9582 
9583 	/* Function tracing may start here (via kernel command line) */
9584 	init_function_trace();
9585 
9586 	/* All seems OK, enable tracing */
9587 	tracing_disabled = 0;
9588 
9589 	atomic_notifier_chain_register(&panic_notifier_list,
9590 				       &trace_panic_notifier);
9591 
9592 	register_die_notifier(&trace_die_notifier);
9593 
9594 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9595 
9596 	INIT_LIST_HEAD(&global_trace.systems);
9597 	INIT_LIST_HEAD(&global_trace.events);
9598 	INIT_LIST_HEAD(&global_trace.hist_vars);
9599 	INIT_LIST_HEAD(&global_trace.err_log);
9600 	list_add(&global_trace.list, &ftrace_trace_arrays);
9601 
9602 	apply_trace_boot_options();
9603 
9604 	register_snapshot_cmd();
9605 
9606 	return 0;
9607 
9608 out_free_savedcmd:
9609 	free_saved_cmdlines_buffer(savedcmd);
9610 out_free_temp_buffer:
9611 	ring_buffer_free(temp_buffer);
9612 out_rm_hp_state:
9613 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9614 out_free_cpumask:
9615 	free_cpumask_var(global_trace.tracing_cpumask);
9616 out_free_buffer_mask:
9617 	free_cpumask_var(tracing_buffer_mask);
9618 out:
9619 	return ret;
9620 }
9621 
early_trace_init(void)9622 void __init early_trace_init(void)
9623 {
9624 	if (tracepoint_printk) {
9625 		tracepoint_print_iter =
9626 			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9627 		if (MEM_FAIL(!tracepoint_print_iter,
9628 			     "Failed to allocate trace iterator\n"))
9629 			tracepoint_printk = 0;
9630 		else
9631 			static_key_enable(&tracepoint_printk_key.key);
9632 	}
9633 	tracer_alloc_buffers();
9634 }
9635 
trace_init(void)9636 void __init trace_init(void)
9637 {
9638 	trace_event_init();
9639 }
9640 
clear_boot_tracer(void)9641 __init static int clear_boot_tracer(void)
9642 {
9643 	/*
9644 	 * The default tracer at boot buffer is an init section.
9645 	 * This function is called in lateinit. If we did not
9646 	 * find the boot tracer, then clear it out, to prevent
9647 	 * later registration from accessing the buffer that is
9648 	 * about to be freed.
9649 	 */
9650 	if (!default_bootup_tracer)
9651 		return 0;
9652 
9653 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9654 	       default_bootup_tracer);
9655 	default_bootup_tracer = NULL;
9656 
9657 	return 0;
9658 }
9659 
9660 fs_initcall(tracer_init_tracefs);
9661 late_initcall_sync(clear_boot_tracer);
9662 
9663 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9664 __init static int tracing_set_default_clock(void)
9665 {
9666 	/* sched_clock_stable() is determined in late_initcall */
9667 	if (!trace_boot_clock && !sched_clock_stable()) {
9668 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
9669 			pr_warn("Can not set tracing clock due to lockdown\n");
9670 			return -EPERM;
9671 		}
9672 
9673 		printk(KERN_WARNING
9674 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
9675 		       "If you want to keep using the local clock, then add:\n"
9676 		       "  \"trace_clock=local\"\n"
9677 		       "on the kernel command line\n");
9678 		tracing_set_clock(&global_trace, "global");
9679 	}
9680 
9681 	return 0;
9682 }
9683 late_initcall_sync(tracing_set_default_clock);
9684 #endif
9685