• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Performance events:
3  *
4  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
5  *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
6  *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
7  *
8  * Data type definitions, declarations, prototypes.
9  *
10  *    Started by: Thomas Gleixner and Ingo Molnar
11  *
12  * For licencing details see kernel-base/COPYING
13  */
14 #ifndef _LINUX_PERF_EVENT_H
15 #define _LINUX_PERF_EVENT_H
16 
17 /* ANDROID_CHANGE_BEGIN */
18 #ifndef __APPLE__
19 /* Suppress kernel-name space pollution in <linux/types.h> below */
20 #include <features.h>
21 #include <linux/types.h>
22 #include <linux/ioctl.h>
23 #include <asm/byteorder.h>
24 #else
25 #include "../types.h"
26 #endif
27 /* ANDROID_CHANGE_END */
28 
29 /*
30  * User-space ABI bits:
31  */
32 
33 /*
34  * attr.type
35  */
36 enum perf_type_id {
37 	PERF_TYPE_HARDWARE			= 0,
38 	PERF_TYPE_SOFTWARE			= 1,
39 	PERF_TYPE_TRACEPOINT			= 2,
40 	PERF_TYPE_HW_CACHE			= 3,
41 	PERF_TYPE_RAW				= 4,
42 	PERF_TYPE_BREAKPOINT			= 5,
43 
44 	PERF_TYPE_MAX,				/* non-ABI */
45 };
46 
47 /*
48  * Generalized performance event event_id types, used by the
49  * attr.event_id parameter of the sys_perf_event_open()
50  * syscall:
51  */
52 enum perf_hw_id {
53 	/*
54 	 * Common hardware events, generalized by the kernel:
55 	 */
56 	PERF_COUNT_HW_CPU_CYCLES		= 0,
57 	PERF_COUNT_HW_INSTRUCTIONS		= 1,
58 	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
59 	PERF_COUNT_HW_CACHE_MISSES		= 3,
60 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
61 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
62 	PERF_COUNT_HW_BUS_CYCLES		= 6,
63 	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
64 	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
65 
66 	PERF_COUNT_HW_MAX,			/* non-ABI */
67 };
68 
69 /*
70  * Generalized hardware cache events:
71  *
72  *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
73  *       { read, write, prefetch } x
74  *       { accesses, misses }
75  */
76 enum perf_hw_cache_id {
77 	PERF_COUNT_HW_CACHE_L1D			= 0,
78 	PERF_COUNT_HW_CACHE_L1I			= 1,
79 	PERF_COUNT_HW_CACHE_LL			= 2,
80 	PERF_COUNT_HW_CACHE_DTLB		= 3,
81 	PERF_COUNT_HW_CACHE_ITLB		= 4,
82 	PERF_COUNT_HW_CACHE_BPU			= 5,
83 
84 	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
85 };
86 
87 enum perf_hw_cache_op_id {
88 	PERF_COUNT_HW_CACHE_OP_READ		= 0,
89 	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
90 	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
91 
92 	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
93 };
94 
95 enum perf_hw_cache_op_result_id {
96 	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
97 	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
98 
99 	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
100 };
101 
102 /*
103  * Special "software" events provided by the kernel, even if the hardware
104  * does not support performance events. These events measure various
105  * physical and sw events of the kernel (and allow the profiling of them as
106  * well):
107  */
108 enum perf_sw_ids {
109 	PERF_COUNT_SW_CPU_CLOCK			= 0,
110 	PERF_COUNT_SW_TASK_CLOCK		= 1,
111 	PERF_COUNT_SW_PAGE_FAULTS		= 2,
112 	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
113 	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
114 	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
115 	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
116 	PERF_COUNT_SW_ALIGNMENT_FAULTS		= 7,
117 	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
118 
119 	PERF_COUNT_SW_MAX,			/* non-ABI */
120 };
121 
122 /*
123  * Bits that can be set in attr.sample_type to request information
124  * in the overflow packets.
125  */
126 enum perf_event_sample_format {
127 	PERF_SAMPLE_IP				= 1U << 0,
128 	PERF_SAMPLE_TID				= 1U << 1,
129 	PERF_SAMPLE_TIME			= 1U << 2,
130 	PERF_SAMPLE_ADDR			= 1U << 3,
131 	PERF_SAMPLE_READ			= 1U << 4,
132 	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
133 	PERF_SAMPLE_ID				= 1U << 6,
134 	PERF_SAMPLE_CPU				= 1U << 7,
135 	PERF_SAMPLE_PERIOD			= 1U << 8,
136 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
137 	PERF_SAMPLE_RAW				= 1U << 10,
138 
139 	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
140 };
141 
142 /*
143  * The format of the data returned by read() on a perf event fd,
144  * as specified by attr.read_format:
145  *
146  * struct read_format {
147  *	{ u64		value;
148  *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
149  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
150  *	  { u64		id;           } && PERF_FORMAT_ID
151  *	} && !PERF_FORMAT_GROUP
152  *
153  *	{ u64		nr;
154  *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
155  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
156  *	  { u64		value;
157  *	    { u64	id;           } && PERF_FORMAT_ID
158  *	  }		cntr[nr];
159  *	} && PERF_FORMAT_GROUP
160  * };
161  */
162 enum perf_event_read_format {
163 	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
164 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
165 	PERF_FORMAT_ID				= 1U << 2,
166 	PERF_FORMAT_GROUP			= 1U << 3,
167 
168 	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
169 };
170 
171 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
172 
173 /*
174  * Hardware event_id to monitor via a performance monitoring event:
175  */
176 struct perf_event_attr {
177 
178 	/*
179 	 * Major type: hardware/software/tracepoint/etc.
180 	 */
181 	__u32			type;
182 
183 	/*
184 	 * Size of the attr structure, for fwd/bwd compat.
185 	 */
186 	__u32			size;
187 
188 	/*
189 	 * Type specific configuration information.
190 	 */
191 	__u64			config;
192 
193 	union {
194 		__u64		sample_period;
195 		__u64		sample_freq;
196 	};
197 
198 	__u64			sample_type;
199 	__u64			read_format;
200 
201 	__u64			disabled       :  1, /* off by default        */
202 				inherit	       :  1, /* children inherit it   */
203 				pinned	       :  1, /* must always be on PMU */
204 				exclusive      :  1, /* only group on PMU     */
205 				exclude_user   :  1, /* don't count user      */
206 				exclude_kernel :  1, /* ditto kernel          */
207 				exclude_hv     :  1, /* ditto hypervisor      */
208 				exclude_idle   :  1, /* don't count when idle */
209 				mmap           :  1, /* include mmap data     */
210 				comm	       :  1, /* include comm data     */
211 				freq           :  1, /* use freq, not period  */
212 				inherit_stat   :  1, /* per task counts       */
213 				enable_on_exec :  1, /* next exec enables     */
214 				task           :  1, /* trace fork/exit       */
215 				watermark      :  1, /* wakeup_watermark      */
216 				/*
217 				 * precise_ip:
218 				 *
219 				 *  0 - SAMPLE_IP can have arbitrary skid
220 				 *  1 - SAMPLE_IP must have constant skid
221 				 *  2 - SAMPLE_IP requested to have 0 skid
222 				 *  3 - SAMPLE_IP must have 0 skid
223 				 *
224 				 *  See also PERF_RECORD_MISC_EXACT_IP
225 				 */
226 				precise_ip     :  2, /* skid constraint       */
227 				mmap_data      :  1, /* non-exec mmap data    */
228 				sample_id_all  :  1, /* sample_type all events */
229 
230 				__reserved_1   : 45;
231 
232 	union {
233 		__u32		wakeup_events;	  /* wakeup every n events */
234 		__u32		wakeup_watermark; /* bytes before wakeup   */
235 	};
236 
237 	__u32			bp_type;
238 	union {
239 		__u64		bp_addr;
240 		__u64		config1; /* extension of config */
241 	};
242 	union {
243 		__u64		bp_len;
244 		__u64		config2; /* extension of config1 */
245 	};
246 };
247 
248 /*
249  * Ioctls that can be done on a perf event fd:
250  */
251 #define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
252 #define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
253 #define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
254 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
255 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, __u64)
256 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
257 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
258 
259 enum perf_event_ioc_flags {
260 	PERF_IOC_FLAG_GROUP		= 1U << 0,
261 };
262 
263 /*
264  * Structure of the page that can be mapped via mmap
265  */
266 struct perf_event_mmap_page {
267 	__u32	version;		/* version number of this structure */
268 	__u32	compat_version;		/* lowest version this is compat with */
269 
270 	/*
271 	 * Bits needed to read the hw events in user-space.
272 	 *
273 	 *   u32 seq;
274 	 *   s64 count;
275 	 *
276 	 *   do {
277 	 *     seq = pc->lock;
278 	 *
279 	 *     barrier()
280 	 *     if (pc->index) {
281 	 *       count = pmc_read(pc->index - 1);
282 	 *       count += pc->offset;
283 	 *     } else
284 	 *       goto regular_read;
285 	 *
286 	 *     barrier();
287 	 *   } while (pc->lock != seq);
288 	 *
289 	 * NOTE: for obvious reason this only works on self-monitoring
290 	 *       processes.
291 	 */
292 	__u32	lock;			/* seqlock for synchronization */
293 	__u32	index;			/* hardware event identifier */
294 	__s64	offset;			/* add to hardware event value */
295 	__u64	time_enabled;		/* time event active */
296 	__u64	time_running;		/* time event on cpu */
297 
298 		/*
299 		 * Hole for extension of the self monitor capabilities
300 		 */
301 
302 	__u64	__reserved[123];	/* align to 1k */
303 
304 	/*
305 	 * Control data for the mmap() data buffer.
306 	 *
307 	 * User-space reading the @data_head value should issue an rmb(), on
308 	 * SMP capable platforms, after reading this value -- see
309 	 * perf_event_wakeup().
310 	 *
311 	 * When the mapping is PROT_WRITE the @data_tail value should be
312 	 * written by userspace to reflect the last read data. In this case
313 	 * the kernel will not over-write unread data.
314 	 */
315 	__u64   data_head;		/* head in the data section */
316 	__u64	data_tail;		/* user-space written tail */
317 };
318 
319 #define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
320 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
321 #define PERF_RECORD_MISC_KERNEL			(1 << 0)
322 #define PERF_RECORD_MISC_USER			(2 << 0)
323 #define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
324 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
325 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
326 
327 /*
328  * Indicates that the content of PERF_SAMPLE_IP points to
329  * the actual instruction that triggered the event. See also
330  * perf_event_attr::precise_ip.
331  */
332 #define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
333 /*
334  * Reserve the last bit to indicate some extended misc field
335  */
336 #define PERF_RECORD_MISC_EXT_RESERVED		(1 << 15)
337 
338 struct perf_event_header {
339 	__u32	type;
340 	__u16	misc;
341 	__u16	size;
342 };
343 
344 enum perf_event_type {
345 
346 	/*
347 	 * If perf_event_attr.sample_id_all is set then all event types will
348 	 * have the sample_type selected fields related to where/when
349 	 * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
350 	 * described in PERF_RECORD_SAMPLE below, it will be stashed just after
351 	 * the perf_event_header and the fields already present for the existing
352 	 * fields, i.e. at the end of the payload. That way a newer perf.data
353 	 * file will be supported by older perf tools, with these new optional
354 	 * fields being ignored.
355 	 *
356 	 * The MMAP events record the PROT_EXEC mappings so that we can
357 	 * correlate userspace IPs to code. They have the following structure:
358 	 *
359 	 * struct {
360 	 *	struct perf_event_header	header;
361 	 *
362 	 *	u32				pid, tid;
363 	 *	u64				addr;
364 	 *	u64				len;
365 	 *	u64				pgoff;
366 	 *	char				filename[];
367 	 * };
368 	 */
369 	PERF_RECORD_MMAP			= 1,
370 
371 	/*
372 	 * struct {
373 	 *	struct perf_event_header	header;
374 	 *	u64				id;
375 	 *	u64				lost;
376 	 * };
377 	 */
378 	PERF_RECORD_LOST			= 2,
379 
380 	/*
381 	 * struct {
382 	 *	struct perf_event_header	header;
383 	 *
384 	 *	u32				pid, tid;
385 	 *	char				comm[];
386 	 * };
387 	 */
388 	PERF_RECORD_COMM			= 3,
389 
390 	/*
391 	 * struct {
392 	 *	struct perf_event_header	header;
393 	 *	u32				pid, ppid;
394 	 *	u32				tid, ptid;
395 	 *	u64				time;
396 	 * };
397 	 */
398 	PERF_RECORD_EXIT			= 4,
399 
400 	/*
401 	 * struct {
402 	 *	struct perf_event_header	header;
403 	 *	u64				time;
404 	 *	u64				id;
405 	 *	u64				stream_id;
406 	 * };
407 	 */
408 	PERF_RECORD_THROTTLE			= 5,
409 	PERF_RECORD_UNTHROTTLE			= 6,
410 
411 	/*
412 	 * struct {
413 	 *	struct perf_event_header	header;
414 	 *	u32				pid, ppid;
415 	 *	u32				tid, ptid;
416 	 *	u64				time;
417 	 * };
418 	 */
419 	PERF_RECORD_FORK			= 7,
420 
421 	/*
422 	 * struct {
423 	 *	struct perf_event_header	header;
424 	 *	u32				pid, tid;
425 	 *
426 	 *	struct read_format		values;
427 	 * };
428 	 */
429 	PERF_RECORD_READ			= 8,
430 
431 	/*
432 	 * struct {
433 	 *	struct perf_event_header	header;
434 	 *
435 	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
436 	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
437 	 *	{ u64			time;     } && PERF_SAMPLE_TIME
438 	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
439 	 *	{ u64			id;	  } && PERF_SAMPLE_ID
440 	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
441 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
442 	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
443 	 *
444 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
445 	 *
446 	 *	{ u64			nr,
447 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
448 	 *
449 	 *	#
450 	 *	# The RAW record below is opaque data wrt the ABI
451 	 *	#
452 	 *	# That is, the ABI doesn't make any promises wrt to
453 	 *	# the stability of its content, it may vary depending
454 	 *	# on event, hardware, kernel version and phase of
455 	 *	# the moon.
456 	 *	#
457 	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
458 	 *	#
459 	 *
460 	 *	{ u32			size;
461 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
462 	 * };
463 	 */
464 	PERF_RECORD_SAMPLE			= 9,
465 
466 	PERF_RECORD_MAX,			/* non-ABI */
467 };
468 
469 enum perf_callchain_context {
470 	PERF_CONTEXT_HV			= (__u64)-32,
471 	PERF_CONTEXT_KERNEL		= (__u64)-128,
472 	PERF_CONTEXT_USER		= (__u64)-512,
473 
474 	PERF_CONTEXT_GUEST		= (__u64)-2048,
475 	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
476 	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
477 
478 	PERF_CONTEXT_MAX		= (__u64)-4095,
479 };
480 
481 #define PERF_FLAG_FD_NO_GROUP		(1U << 0)
482 #define PERF_FLAG_FD_OUTPUT		(1U << 1)
483 #define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
484 
485 #ifdef __KERNEL__
486 /*
487  * Kernel-internal data types and definitions:
488  */
489 
490 #ifdef CONFIG_PERF_EVENTS
491 # include <linux/cgroup.h>
492 # include <asm/perf_event.h>
493 # include <asm/local64.h>
494 #endif
495 
496 struct perf_guest_info_callbacks {
497 	int				(*is_in_guest)(void);
498 	int				(*is_user_mode)(void);
499 	unsigned long			(*get_guest_ip)(void);
500 };
501 
502 #ifdef CONFIG_HAVE_HW_BREAKPOINT
503 #include <asm/hw_breakpoint.h>
504 #endif
505 
506 #include <linux/list.h>
507 #include <linux/mutex.h>
508 #include <linux/rculist.h>
509 #include <linux/rcupdate.h>
510 #include <linux/spinlock.h>
511 #include <linux/hrtimer.h>
512 #include <linux/fs.h>
513 #include <linux/pid_namespace.h>
514 #include <linux/workqueue.h>
515 #include <linux/ftrace.h>
516 #include <linux/cpu.h>
517 #include <linux/irq_work.h>
518 #include <linux/jump_label.h>
519 #include <asm/atomic.h>
520 #include <asm/local.h>
521 
522 #define PERF_MAX_STACK_DEPTH		255
523 
524 struct perf_callchain_entry {
525 	__u64				nr;
526 	__u64				ip[PERF_MAX_STACK_DEPTH];
527 };
528 
529 struct perf_raw_record {
530 	u32				size;
531 	void				*data;
532 };
533 
534 struct perf_branch_entry {
535 	__u64				from;
536 	__u64				to;
537 	__u64				flags;
538 };
539 
540 struct perf_branch_stack {
541 	__u64				nr;
542 	struct perf_branch_entry	entries[0];
543 };
544 
545 struct task_struct;
546 
547 /**
548  * struct hw_perf_event - performance event hardware details:
549  */
550 struct hw_perf_event {
551 #ifdef CONFIG_PERF_EVENTS
552 	union {
553 		struct { /* hardware */
554 			u64		config;
555 			u64		last_tag;
556 			unsigned long	config_base;
557 			unsigned long	event_base;
558 			int		idx;
559 			int		last_cpu;
560 			unsigned int	extra_reg;
561 			u64		extra_config;
562 			int		extra_alloc;
563 		};
564 		struct { /* software */
565 			struct hrtimer	hrtimer;
566 		};
567 #ifdef CONFIG_HAVE_HW_BREAKPOINT
568 		struct { /* breakpoint */
569 			struct arch_hw_breakpoint	info;
570 			struct list_head		bp_list;
571 			/*
572 			 * Crufty hack to avoid the chicken and egg
573 			 * problem hw_breakpoint has with context
574 			 * creation and event initalization.
575 			 */
576 			struct task_struct		*bp_target;
577 		};
578 #endif
579 	};
580 	int				state;
581 	local64_t			prev_count;
582 	u64				sample_period;
583 	u64				last_period;
584 	local64_t			period_left;
585 	u64				interrupts;
586 
587 	u64				freq_time_stamp;
588 	u64				freq_count_stamp;
589 #endif
590 };
591 
592 /*
593  * hw_perf_event::state flags
594  */
595 #define PERF_HES_STOPPED	0x01 /* the counter is stopped */
596 #define PERF_HES_UPTODATE	0x02 /* event->count up-to-date */
597 #define PERF_HES_ARCH		0x04
598 
599 struct perf_event;
600 
601 /*
602  * Common implementation detail of pmu::{start,commit,cancel}_txn
603  */
604 #define PERF_EVENT_TXN 0x1
605 
606 /**
607  * struct pmu - generic performance monitoring unit
608  */
609 struct pmu {
610 	struct list_head		entry;
611 
612 	struct device			*dev;
613 	char				*name;
614 	int				type;
615 
616 	int * __percpu			pmu_disable_count;
617 	struct perf_cpu_context * __percpu pmu_cpu_context;
618 	int				task_ctx_nr;
619 
620 	/*
621 	 * Fully disable/enable this PMU, can be used to protect from the PMI
622 	 * as well as for lazy/batch writing of the MSRs.
623 	 */
624 	void (*pmu_enable)		(struct pmu *pmu); /* optional */
625 	void (*pmu_disable)		(struct pmu *pmu); /* optional */
626 
627 	/*
628 	 * Try and initialize the event for this PMU.
629 	 * Should return -ENOENT when the @event doesn't match this PMU.
630 	 */
631 	int (*event_init)		(struct perf_event *event);
632 
633 #define PERF_EF_START	0x01		/* start the counter when adding    */
634 #define PERF_EF_RELOAD	0x02		/* reload the counter when starting */
635 #define PERF_EF_UPDATE	0x04		/* update the counter when stopping */
636 
637 	/*
638 	 * Adds/Removes a counter to/from the PMU, can be done inside
639 	 * a transaction, see the ->*_txn() methods.
640 	 */
641 	int  (*add)			(struct perf_event *event, int flags);
642 	void (*del)			(struct perf_event *event, int flags);
643 
644 	/*
645 	 * Starts/Stops a counter present on the PMU. The PMI handler
646 	 * should stop the counter when perf_event_overflow() returns
647 	 * !0. ->start() will be used to continue.
648 	 */
649 	void (*start)			(struct perf_event *event, int flags);
650 	void (*stop)			(struct perf_event *event, int flags);
651 
652 	/*
653 	 * Updates the counter value of the event.
654 	 */
655 	void (*read)			(struct perf_event *event);
656 
657 	/*
658 	 * Group events scheduling is treated as a transaction, add
659 	 * group events as a whole and perform one schedulability test.
660 	 * If the test fails, roll back the whole group
661 	 *
662 	 * Start the transaction, after this ->add() doesn't need to
663 	 * do schedulability tests.
664 	 */
665 	void (*start_txn)		(struct pmu *pmu); /* optional */
666 	/*
667 	 * If ->start_txn() disabled the ->add() schedulability test
668 	 * then ->commit_txn() is required to perform one. On success
669 	 * the transaction is closed. On error the transaction is kept
670 	 * open until ->cancel_txn() is called.
671 	 */
672 	int  (*commit_txn)		(struct pmu *pmu); /* optional */
673 	/*
674 	 * Will cancel the transaction, assumes ->del() is called
675 	 * for each successful ->add() during the transaction.
676 	 */
677 	void (*cancel_txn)		(struct pmu *pmu); /* optional */
678 };
679 
680 /**
681  * enum perf_event_active_state - the states of a event
682  */
683 enum perf_event_active_state {
684 	PERF_EVENT_STATE_ERROR		= -2,
685 	PERF_EVENT_STATE_OFF		= -1,
686 	PERF_EVENT_STATE_INACTIVE	=  0,
687 	PERF_EVENT_STATE_ACTIVE		=  1,
688 };
689 
690 struct file;
691 
692 #define PERF_BUFFER_WRITABLE		0x01
693 
694 struct perf_buffer {
695 	atomic_t			refcount;
696 	struct rcu_head			rcu_head;
697 #ifdef CONFIG_PERF_USE_VMALLOC
698 	struct work_struct		work;
699 	int				page_order;	/* allocation order  */
700 #endif
701 	int				nr_pages;	/* nr of data pages  */
702 	int				writable;	/* are we writable   */
703 
704 	atomic_t			poll;		/* POLL_ for wakeups */
705 
706 	local_t				head;		/* write position    */
707 	local_t				nest;		/* nested writers    */
708 	local_t				events;		/* event limit       */
709 	local_t				wakeup;		/* wakeup stamp      */
710 	local_t				lost;		/* nr records lost   */
711 
712 	long				watermark;	/* wakeup watermark  */
713 
714 	struct perf_event_mmap_page	*user_page;
715 	void				*data_pages[0];
716 };
717 
718 struct perf_sample_data;
719 
720 typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
721 					struct perf_sample_data *,
722 					struct pt_regs *regs);
723 
724 enum perf_group_flag {
725 	PERF_GROUP_SOFTWARE		= 0x1,
726 };
727 
728 #define SWEVENT_HLIST_BITS		8
729 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
730 
731 struct swevent_hlist {
732 	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
733 	struct rcu_head			rcu_head;
734 };
735 
736 #define PERF_ATTACH_CONTEXT	0x01
737 #define PERF_ATTACH_GROUP	0x02
738 #define PERF_ATTACH_TASK	0x04
739 
740 #ifdef CONFIG_CGROUP_PERF
741 /*
742  * perf_cgroup_info keeps track of time_enabled for a cgroup.
743  * This is a per-cpu dynamically allocated data structure.
744  */
745 struct perf_cgroup_info {
746 	u64				time;
747 	u64				timestamp;
748 };
749 
750 struct perf_cgroup {
751 	struct				cgroup_subsys_state css;
752 	struct				perf_cgroup_info *info;	/* timing info, one per cpu */
753 };
754 #endif
755 
756 /**
757  * struct perf_event - performance event kernel representation:
758  */
759 struct perf_event {
760 #ifdef CONFIG_PERF_EVENTS
761 	struct list_head		group_entry;
762 	struct list_head		event_entry;
763 	struct list_head		sibling_list;
764 	struct hlist_node		hlist_entry;
765 	int				nr_siblings;
766 	int				group_flags;
767 	struct perf_event		*group_leader;
768 	struct pmu			*pmu;
769 
770 	enum perf_event_active_state	state;
771 	unsigned int			attach_state;
772 	local64_t			count;
773 	atomic64_t			child_count;
774 
775 	/*
776 	 * These are the total time in nanoseconds that the event
777 	 * has been enabled (i.e. eligible to run, and the task has
778 	 * been scheduled in, if this is a per-task event)
779 	 * and running (scheduled onto the CPU), respectively.
780 	 *
781 	 * They are computed from tstamp_enabled, tstamp_running and
782 	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
783 	 */
784 	u64				total_time_enabled;
785 	u64				total_time_running;
786 
787 	/*
788 	 * These are timestamps used for computing total_time_enabled
789 	 * and total_time_running when the event is in INACTIVE or
790 	 * ACTIVE state, measured in nanoseconds from an arbitrary point
791 	 * in time.
792 	 * tstamp_enabled: the notional time when the event was enabled
793 	 * tstamp_running: the notional time when the event was scheduled on
794 	 * tstamp_stopped: in INACTIVE state, the notional time when the
795 	 *	event was scheduled off.
796 	 */
797 	u64				tstamp_enabled;
798 	u64				tstamp_running;
799 	u64				tstamp_stopped;
800 
801 	/*
802 	 * timestamp shadows the actual context timing but it can
803 	 * be safely used in NMI interrupt context. It reflects the
804 	 * context time as it was when the event was last scheduled in.
805 	 *
806 	 * ctx_time already accounts for ctx->timestamp. Therefore to
807 	 * compute ctx_time for a sample, simply add perf_clock().
808 	 */
809 	u64				shadow_ctx_time;
810 
811 	struct perf_event_attr		attr;
812 	u16				header_size;
813 	u16				id_header_size;
814 	u16				read_size;
815 	struct hw_perf_event		hw;
816 
817 	struct perf_event_context	*ctx;
818 	struct file			*filp;
819 
820 	/*
821 	 * These accumulate total time (in nanoseconds) that children
822 	 * events have been enabled and running, respectively.
823 	 */
824 	atomic64_t			child_total_time_enabled;
825 	atomic64_t			child_total_time_running;
826 
827 	/*
828 	 * Protect attach/detach and child_list:
829 	 */
830 	struct mutex			child_mutex;
831 	struct list_head		child_list;
832 	struct perf_event		*parent;
833 
834 	int				oncpu;
835 	int				cpu;
836 
837 	struct list_head		owner_entry;
838 	struct task_struct		*owner;
839 
840 	/* mmap bits */
841 	struct mutex			mmap_mutex;
842 	atomic_t			mmap_count;
843 	int				mmap_locked;
844 	struct user_struct		*mmap_user;
845 	struct perf_buffer		*buffer;
846 
847 	/* poll related */
848 	wait_queue_head_t		waitq;
849 	struct fasync_struct		*fasync;
850 
851 	/* delayed work for NMIs and such */
852 	int				pending_wakeup;
853 	int				pending_kill;
854 	int				pending_disable;
855 	struct irq_work			pending;
856 
857 	atomic_t			event_limit;
858 
859 	void (*destroy)(struct perf_event *);
860 	struct rcu_head			rcu_head;
861 
862 	struct pid_namespace		*ns;
863 	u64				id;
864 
865 	perf_overflow_handler_t		overflow_handler;
866 
867 #ifdef CONFIG_EVENT_TRACING
868 	struct ftrace_event_call	*tp_event;
869 	struct event_filter		*filter;
870 #endif
871 
872 #ifdef CONFIG_CGROUP_PERF
873 	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
874 	int				cgrp_defer_enabled;
875 #endif
876 
877 #endif /* CONFIG_PERF_EVENTS */
878 };
879 
880 enum perf_event_context_type {
881 	task_context,
882 	cpu_context,
883 };
884 
885 /**
886  * struct perf_event_context - event context structure
887  *
888  * Used as a container for task events and CPU events as well:
889  */
890 struct perf_event_context {
891 	struct pmu			*pmu;
892 	enum perf_event_context_type	type;
893 	/*
894 	 * Protect the states of the events in the list,
895 	 * nr_active, and the list:
896 	 */
897 	raw_spinlock_t			lock;
898 	/*
899 	 * Protect the list of events.  Locking either mutex or lock
900 	 * is sufficient to ensure the list doesn't change; to change
901 	 * the list you need to lock both the mutex and the spinlock.
902 	 */
903 	struct mutex			mutex;
904 
905 	struct list_head		pinned_groups;
906 	struct list_head		flexible_groups;
907 	struct list_head		event_list;
908 	int				nr_events;
909 	int				nr_active;
910 	int				is_active;
911 	int				nr_stat;
912 	int				rotate_disable;
913 	atomic_t			refcount;
914 	struct task_struct		*task;
915 
916 	/*
917 	 * Context clock, runs when context enabled.
918 	 */
919 	u64				time;
920 	u64				timestamp;
921 
922 	/*
923 	 * These fields let us detect when two contexts have both
924 	 * been cloned (inherited) from a common ancestor.
925 	 */
926 	struct perf_event_context	*parent_ctx;
927 	u64				parent_gen;
928 	u64				generation;
929 	int				pin_count;
930 	struct rcu_head			rcu_head;
931 	int				nr_cgroups; /* cgroup events present */
932 };
933 
934 /*
935  * Number of contexts where an event can trigger:
936  *	task, softirq, hardirq, nmi.
937  */
938 #define PERF_NR_CONTEXTS	4
939 
940 /**
941  * struct perf_event_cpu_context - per cpu event context structure
942  */
943 struct perf_cpu_context {
944 	struct perf_event_context	ctx;
945 	struct perf_event_context	*task_ctx;
946 	int				active_oncpu;
947 	int				exclusive;
948 	struct list_head		rotation_list;
949 	int				jiffies_interval;
950 	struct pmu			*active_pmu;
951 	struct perf_cgroup		*cgrp;
952 };
953 
954 struct perf_output_handle {
955 	struct perf_event		*event;
956 	struct perf_buffer		*buffer;
957 	unsigned long			wakeup;
958 	unsigned long			size;
959 	void				*addr;
960 	int				page;
961 	int				nmi;
962 	int				sample;
963 };
964 
965 #ifdef CONFIG_PERF_EVENTS
966 
967 extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
968 extern void perf_pmu_unregister(struct pmu *pmu);
969 
970 extern int perf_num_counters(void);
971 extern const char *perf_pmu_name(void);
972 extern void __perf_event_task_sched_in(struct task_struct *task);
973 extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
974 extern int perf_event_init_task(struct task_struct *child);
975 extern void perf_event_exit_task(struct task_struct *child);
976 extern void perf_event_free_task(struct task_struct *task);
977 extern void perf_event_delayed_put(struct task_struct *task);
978 extern void perf_event_print_debug(void);
979 extern void perf_pmu_disable(struct pmu *pmu);
980 extern void perf_pmu_enable(struct pmu *pmu);
981 extern int perf_event_task_disable(void);
982 extern int perf_event_task_enable(void);
983 extern void perf_event_update_userpage(struct perf_event *event);
984 extern int perf_event_release_kernel(struct perf_event *event);
985 extern struct perf_event *
986 perf_event_create_kernel_counter(struct perf_event_attr *attr,
987 				int cpu,
988 				struct task_struct *task,
989 				perf_overflow_handler_t callback);
990 extern u64 perf_event_read_value(struct perf_event *event,
991 				 u64 *enabled, u64 *running);
992 
993 struct perf_sample_data {
994 	u64				type;
995 
996 	u64				ip;
997 	struct {
998 		u32	pid;
999 		u32	tid;
1000 	}				tid_entry;
1001 	u64				time;
1002 	u64				addr;
1003 	u64				id;
1004 	u64				stream_id;
1005 	struct {
1006 		u32	cpu;
1007 		u32	reserved;
1008 	}				cpu_entry;
1009 	u64				period;
1010 	struct perf_callchain_entry	*callchain;
1011 	struct perf_raw_record		*raw;
1012 };
1013 
perf_sample_data_init(struct perf_sample_data * data,u64 addr)1014 static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
1015 {
1016 	data->addr = addr;
1017 	data->raw  = NULL;
1018 }
1019 
1020 extern void perf_output_sample(struct perf_output_handle *handle,
1021 			       struct perf_event_header *header,
1022 			       struct perf_sample_data *data,
1023 			       struct perf_event *event);
1024 extern void perf_prepare_sample(struct perf_event_header *header,
1025 				struct perf_sample_data *data,
1026 				struct perf_event *event,
1027 				struct pt_regs *regs);
1028 
1029 extern int perf_event_overflow(struct perf_event *event, int nmi,
1030 				 struct perf_sample_data *data,
1031 				 struct pt_regs *regs);
1032 
is_sampling_event(struct perf_event * event)1033 static inline bool is_sampling_event(struct perf_event *event)
1034 {
1035 	return event->attr.sample_period != 0;
1036 }
1037 
1038 /*
1039  * Return 1 for a software event, 0 for a hardware event
1040  */
is_software_event(struct perf_event * event)1041 static inline int is_software_event(struct perf_event *event)
1042 {
1043 	return event->pmu->task_ctx_nr == perf_sw_context;
1044 }
1045 
1046 extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1047 
1048 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
1049 
1050 #ifndef perf_arch_fetch_caller_regs
perf_arch_fetch_caller_regs(struct pt_regs * regs,unsigned long ip)1051 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
1052 #endif
1053 
1054 /*
1055  * Take a snapshot of the regs. Skip ip and frame pointer to
1056  * the nth caller. We only need a few of the regs:
1057  * - ip for PERF_SAMPLE_IP
1058  * - cs for user_mode() tests
1059  * - bp for callchains
1060  * - eflags, for future purposes, just in case
1061  */
perf_fetch_caller_regs(struct pt_regs * regs)1062 static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1063 {
1064 	memset(regs, 0, sizeof(*regs));
1065 
1066 	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
1067 }
1068 
1069 static __always_inline void
perf_sw_event(u32 event_id,u64 nr,int nmi,struct pt_regs * regs,u64 addr)1070 perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
1071 {
1072 	struct pt_regs hot_regs;
1073 
1074 	if (static_branch(&perf_swevent_enabled[event_id])) {
1075 		if (!regs) {
1076 			perf_fetch_caller_regs(&hot_regs);
1077 			regs = &hot_regs;
1078 		}
1079 		__perf_sw_event(event_id, nr, nmi, regs, addr);
1080 	}
1081 }
1082 
1083 extern struct jump_label_key perf_sched_events;
1084 
perf_event_task_sched_in(struct task_struct * task)1085 static inline void perf_event_task_sched_in(struct task_struct *task)
1086 {
1087 	if (static_branch(&perf_sched_events))
1088 		__perf_event_task_sched_in(task);
1089 }
1090 
perf_event_task_sched_out(struct task_struct * task,struct task_struct * next)1091 static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
1092 {
1093 	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1094 
1095 	__perf_event_task_sched_out(task, next);
1096 }
1097 
1098 extern void perf_event_mmap(struct vm_area_struct *vma);
1099 extern struct perf_guest_info_callbacks *perf_guest_cbs;
1100 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
1101 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
1102 
1103 extern void perf_event_comm(struct task_struct *tsk);
1104 extern void perf_event_fork(struct task_struct *tsk);
1105 
1106 /* Callchains */
1107 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1108 
1109 extern void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs);
1110 extern void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs);
1111 
perf_callchain_store(struct perf_callchain_entry * entry,u64 ip)1112 static inline void perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
1113 {
1114 	if (entry->nr < PERF_MAX_STACK_DEPTH)
1115 		entry->ip[entry->nr++] = ip;
1116 }
1117 
1118 extern int sysctl_perf_event_paranoid;
1119 extern int sysctl_perf_event_mlock;
1120 extern int sysctl_perf_event_sample_rate;
1121 
1122 extern int perf_proc_update_handler(struct ctl_table *table, int write,
1123 		void __user *buffer, size_t *lenp,
1124 		loff_t *ppos);
1125 
perf_paranoid_tracepoint_raw(void)1126 static inline bool perf_paranoid_tracepoint_raw(void)
1127 {
1128 	return sysctl_perf_event_paranoid > -1;
1129 }
1130 
perf_paranoid_cpu(void)1131 static inline bool perf_paranoid_cpu(void)
1132 {
1133 	return sysctl_perf_event_paranoid > 0;
1134 }
1135 
perf_paranoid_kernel(void)1136 static inline bool perf_paranoid_kernel(void)
1137 {
1138 	return sysctl_perf_event_paranoid > 1;
1139 }
1140 
1141 extern void perf_event_init(void);
1142 extern void perf_tp_event(u64 addr, u64 count, void *record,
1143 			  int entry_size, struct pt_regs *regs,
1144 			  struct hlist_head *head, int rctx);
1145 extern void perf_bp_event(struct perf_event *event, void *data);
1146 
1147 #ifndef perf_misc_flags
1148 # define perf_misc_flags(regs) \
1149 		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
1150 # define perf_instruction_pointer(regs)	instruction_pointer(regs)
1151 #endif
1152 
1153 extern int perf_output_begin(struct perf_output_handle *handle,
1154 			     struct perf_event *event, unsigned int size,
1155 			     int nmi, int sample);
1156 extern void perf_output_end(struct perf_output_handle *handle);
1157 extern void perf_output_copy(struct perf_output_handle *handle,
1158 			     const void *buf, unsigned int len);
1159 extern int perf_swevent_get_recursion_context(void);
1160 extern void perf_swevent_put_recursion_context(int rctx);
1161 extern void perf_event_enable(struct perf_event *event);
1162 extern void perf_event_disable(struct perf_event *event);
1163 extern void perf_event_task_tick(void);
1164 #else
1165 static inline void
perf_event_task_sched_in(struct task_struct * task)1166 perf_event_task_sched_in(struct task_struct *task)			{ }
1167 static inline void
perf_event_task_sched_out(struct task_struct * task,struct task_struct * next)1168 perf_event_task_sched_out(struct task_struct *task,
1169 			    struct task_struct *next)			{ }
perf_event_init_task(struct task_struct * child)1170 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
perf_event_exit_task(struct task_struct * child)1171 static inline void perf_event_exit_task(struct task_struct *child)	{ }
perf_event_free_task(struct task_struct * task)1172 static inline void perf_event_free_task(struct task_struct *task)	{ }
perf_event_delayed_put(struct task_struct * task)1173 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
perf_event_print_debug(void)1174 static inline void perf_event_print_debug(void)				{ }
perf_event_task_disable(void)1175 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
perf_event_task_enable(void)1176 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
1177 
1178 static inline void
perf_sw_event(u32 event_id,u64 nr,int nmi,struct pt_regs * regs,u64 addr)1179 perf_sw_event(u32 event_id, u64 nr, int nmi,
1180 		     struct pt_regs *regs, u64 addr)			{ }
1181 static inline void
perf_bp_event(struct perf_event * event,void * data)1182 perf_bp_event(struct perf_event *event, void *data)			{ }
1183 
perf_register_guest_info_callbacks(struct perf_guest_info_callbacks * callbacks)1184 static inline int perf_register_guest_info_callbacks
1185 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks * callbacks)1186 static inline int perf_unregister_guest_info_callbacks
1187 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
1188 
perf_event_mmap(struct vm_area_struct * vma)1189 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
perf_event_comm(struct task_struct * tsk)1190 static inline void perf_event_comm(struct task_struct *tsk)		{ }
perf_event_fork(struct task_struct * tsk)1191 static inline void perf_event_fork(struct task_struct *tsk)		{ }
perf_event_init(void)1192 static inline void perf_event_init(void)				{ }
perf_swevent_get_recursion_context(void)1193 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
perf_swevent_put_recursion_context(int rctx)1194 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
perf_event_enable(struct perf_event * event)1195 static inline void perf_event_enable(struct perf_event *event)		{ }
perf_event_disable(struct perf_event * event)1196 static inline void perf_event_disable(struct perf_event *event)		{ }
perf_event_task_tick(void)1197 static inline void perf_event_task_tick(void)				{ }
1198 #endif
1199 
1200 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
1201 
1202 /*
1203  * This has to have a higher priority than migration_notifier in sched.c.
1204  */
1205 #define perf_cpu_notifier(fn)						\
1206 do {									\
1207 	static struct notifier_block fn##_nb __cpuinitdata =		\
1208 		{ .notifier_call = fn, .priority = CPU_PRI_PERF };	\
1209 	fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,			\
1210 		(void *)(unsigned long)smp_processor_id());		\
1211 	fn(&fn##_nb, (unsigned long)CPU_STARTING,			\
1212 		(void *)(unsigned long)smp_processor_id());		\
1213 	fn(&fn##_nb, (unsigned long)CPU_ONLINE,				\
1214 		(void *)(unsigned long)smp_processor_id());		\
1215 	register_cpu_notifier(&fn##_nb);				\
1216 } while (0)
1217 
1218 #endif /* __KERNEL__ */
1219 #endif /* _LINUX_PERF_EVENT_H */
1220