1 /*
2 * Performance events x86 architecture header
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14
15 #include <linux/perf_event.h>
16
17 #include <asm/intel_ds.h>
18 #include <asm/cpu.h>
19
20 /* To enable MSR tracing please use the generic trace points. */
21
22 /*
23 * | NHM/WSM | SNB |
24 * register -------------------------------
25 * | HT | no HT | HT | no HT |
26 *-----------------------------------------
27 * offcore | core | core | cpu | core |
28 * lbr_sel | core | core | cpu | core |
29 * ld_lat | cpu | core | cpu | core |
30 *-----------------------------------------
31 *
32 * Given that there is a small number of shared regs,
33 * we can pre-allocate their slot in the per-cpu
34 * per-core reg tables.
35 */
36 enum extra_reg_type {
37 EXTRA_REG_NONE = -1, /* not used */
38
39 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
40 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
41 EXTRA_REG_LBR = 2, /* lbr_select */
42 EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
43 EXTRA_REG_FE = 4, /* fe_* */
44
45 EXTRA_REG_MAX /* number of entries needed */
46 };
47
48 struct event_constraint {
49 union {
50 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
51 u64 idxmsk64;
52 };
53 u64 code;
54 u64 cmask;
55 int weight;
56 int overlap;
57 int flags;
58 unsigned int size;
59 };
60
constraint_match(struct event_constraint * c,u64 ecode)61 static inline bool constraint_match(struct event_constraint *c, u64 ecode)
62 {
63 return ((ecode & c->cmask) - c->code) <= (u64)c->size;
64 }
65
66 /*
67 * struct hw_perf_event.flags flags
68 */
69 #define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
70 #define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
71 #define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
72 #define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
73 #define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
74 #define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
75 #define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
76 #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0080 /* grant rdpmc permission */
77 #define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
78 #define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
79 #define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
80 #define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
81 #define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
82 #define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
83 #define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
84 #define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
85
is_topdown_count(struct perf_event * event)86 static inline bool is_topdown_count(struct perf_event *event)
87 {
88 return event->hw.flags & PERF_X86_EVENT_TOPDOWN;
89 }
90
is_metric_event(struct perf_event * event)91 static inline bool is_metric_event(struct perf_event *event)
92 {
93 u64 config = event->attr.config;
94
95 return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) &&
96 ((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) &&
97 ((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX);
98 }
99
is_slots_event(struct perf_event * event)100 static inline bool is_slots_event(struct perf_event *event)
101 {
102 return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS;
103 }
104
is_topdown_event(struct perf_event * event)105 static inline bool is_topdown_event(struct perf_event *event)
106 {
107 return is_metric_event(event) || is_slots_event(event);
108 }
109
110 struct amd_nb {
111 int nb_id; /* NorthBridge id */
112 int refcnt; /* reference count */
113 struct perf_event *owners[X86_PMC_IDX_MAX];
114 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
115 };
116
117 #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
118 #define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60)
119 #define PEBS_OUTPUT_OFFSET 61
120 #define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET)
121 #define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET)
122 #define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD)
123
124 /*
125 * Flags PEBS can handle without an PMI.
126 *
127 * TID can only be handled by flushing at context switch.
128 * REGS_USER can be handled for events limited to ring 3.
129 *
130 */
131 #define LARGE_PEBS_FLAGS \
132 (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
133 PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
134 PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
135 PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
136 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
137 PERF_SAMPLE_PERIOD | PERF_SAMPLE_CODE_PAGE_SIZE)
138
139 #define PEBS_GP_REGS \
140 ((1ULL << PERF_REG_X86_AX) | \
141 (1ULL << PERF_REG_X86_BX) | \
142 (1ULL << PERF_REG_X86_CX) | \
143 (1ULL << PERF_REG_X86_DX) | \
144 (1ULL << PERF_REG_X86_DI) | \
145 (1ULL << PERF_REG_X86_SI) | \
146 (1ULL << PERF_REG_X86_SP) | \
147 (1ULL << PERF_REG_X86_BP) | \
148 (1ULL << PERF_REG_X86_IP) | \
149 (1ULL << PERF_REG_X86_FLAGS) | \
150 (1ULL << PERF_REG_X86_R8) | \
151 (1ULL << PERF_REG_X86_R9) | \
152 (1ULL << PERF_REG_X86_R10) | \
153 (1ULL << PERF_REG_X86_R11) | \
154 (1ULL << PERF_REG_X86_R12) | \
155 (1ULL << PERF_REG_X86_R13) | \
156 (1ULL << PERF_REG_X86_R14) | \
157 (1ULL << PERF_REG_X86_R15))
158
159 /*
160 * Per register state.
161 */
162 struct er_account {
163 raw_spinlock_t lock; /* per-core: protect structure */
164 u64 config; /* extra MSR config */
165 u64 reg; /* extra MSR number */
166 atomic_t ref; /* reference count */
167 };
168
169 /*
170 * Per core/cpu state
171 *
172 * Used to coordinate shared registers between HT threads or
173 * among events on a single PMU.
174 */
175 struct intel_shared_regs {
176 struct er_account regs[EXTRA_REG_MAX];
177 int refcnt; /* per-core: #HT threads */
178 unsigned core_id; /* per-core: core id */
179 };
180
181 enum intel_excl_state_type {
182 INTEL_EXCL_UNUSED = 0, /* counter is unused */
183 INTEL_EXCL_SHARED = 1, /* counter can be used by both threads */
184 INTEL_EXCL_EXCLUSIVE = 2, /* counter can be used by one thread only */
185 };
186
187 struct intel_excl_states {
188 enum intel_excl_state_type state[X86_PMC_IDX_MAX];
189 bool sched_started; /* true if scheduling has started */
190 };
191
192 struct intel_excl_cntrs {
193 raw_spinlock_t lock;
194
195 struct intel_excl_states states[2];
196
197 union {
198 u16 has_exclusive[2];
199 u32 exclusive_present;
200 };
201
202 int refcnt; /* per-core: #HT threads */
203 unsigned core_id; /* per-core: core id */
204 };
205
206 struct x86_perf_task_context;
207 #define MAX_LBR_ENTRIES 32
208
209 enum {
210 LBR_FORMAT_32 = 0x00,
211 LBR_FORMAT_LIP = 0x01,
212 LBR_FORMAT_EIP = 0x02,
213 LBR_FORMAT_EIP_FLAGS = 0x03,
214 LBR_FORMAT_EIP_FLAGS2 = 0x04,
215 LBR_FORMAT_INFO = 0x05,
216 LBR_FORMAT_TIME = 0x06,
217 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
218 };
219
220 enum {
221 X86_PERF_KFREE_SHARED = 0,
222 X86_PERF_KFREE_EXCL = 1,
223 X86_PERF_KFREE_MAX
224 };
225
226 struct cpu_hw_events {
227 /*
228 * Generic x86 PMC bits
229 */
230 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
231 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
232 unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
233 int enabled;
234
235 int n_events; /* the # of events in the below arrays */
236 int n_added; /* the # last events in the below arrays;
237 they've never been enabled yet */
238 int n_txn; /* the # last events in the below arrays;
239 added in the current transaction */
240 int n_txn_pair;
241 int n_txn_metric;
242 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
243 u64 tags[X86_PMC_IDX_MAX];
244
245 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
246 struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
247
248 int n_excl; /* the number of exclusive events */
249
250 unsigned int txn_flags;
251 int is_fake;
252
253 /*
254 * Intel DebugStore bits
255 */
256 struct debug_store *ds;
257 void *ds_pebs_vaddr;
258 void *ds_bts_vaddr;
259 u64 pebs_enabled;
260 int n_pebs;
261 int n_large_pebs;
262 int n_pebs_via_pt;
263 int pebs_output;
264
265 /* Current super set of events hardware configuration */
266 u64 pebs_data_cfg;
267 u64 active_pebs_data_cfg;
268 int pebs_record_size;
269
270 /*
271 * Intel LBR bits
272 */
273 int lbr_users;
274 int lbr_pebs_users;
275 struct perf_branch_stack lbr_stack;
276 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
277 union {
278 struct er_account *lbr_sel;
279 struct er_account *lbr_ctl;
280 };
281 u64 br_sel;
282 void *last_task_ctx;
283 int last_log_id;
284 int lbr_select;
285 void *lbr_xsave;
286
287 /*
288 * Intel host/guest exclude bits
289 */
290 u64 intel_ctrl_guest_mask;
291 u64 intel_ctrl_host_mask;
292 struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX];
293
294 /*
295 * Intel checkpoint mask
296 */
297 u64 intel_cp_status;
298
299 /*
300 * manage shared (per-core, per-cpu) registers
301 * used on Intel NHM/WSM/SNB
302 */
303 struct intel_shared_regs *shared_regs;
304 /*
305 * manage exclusive counter access between hyperthread
306 */
307 struct event_constraint *constraint_list; /* in enable order */
308 struct intel_excl_cntrs *excl_cntrs;
309 int excl_thread_id; /* 0 or 1 */
310
311 /*
312 * SKL TSX_FORCE_ABORT shadow
313 */
314 u64 tfa_shadow;
315
316 /*
317 * Perf Metrics
318 */
319 /* number of accepted metrics events */
320 int n_metric;
321
322 /*
323 * AMD specific bits
324 */
325 struct amd_nb *amd_nb;
326 /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
327 u64 perf_ctr_virt_mask;
328 int n_pair; /* Large increment events */
329
330 void *kfree_on_online[X86_PERF_KFREE_MAX];
331
332 struct pmu *pmu;
333 };
334
335 #define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \
336 { .idxmsk64 = (n) }, \
337 .code = (c), \
338 .size = (e) - (c), \
339 .cmask = (m), \
340 .weight = (w), \
341 .overlap = (o), \
342 .flags = f, \
343 }
344
345 #define __EVENT_CONSTRAINT(c, n, m, w, o, f) \
346 __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f)
347
348 #define EVENT_CONSTRAINT(c, n, m) \
349 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
350
351 /*
352 * The constraint_match() function only works for 'simple' event codes
353 * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
354 */
355 #define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
356 __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
357
358 #define INTEL_EXCLEVT_CONSTRAINT(c, n) \
359 __EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
360 0, PERF_X86_EVENT_EXCL)
361
362 /*
363 * The overlap flag marks event constraints with overlapping counter
364 * masks. This is the case if the counter mask of such an event is not
365 * a subset of any other counter mask of a constraint with an equal or
366 * higher weight, e.g.:
367 *
368 * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
369 * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
370 * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
371 *
372 * The event scheduler may not select the correct counter in the first
373 * cycle because it needs to know which subsequent events will be
374 * scheduled. It may fail to schedule the events then. So we set the
375 * overlap flag for such constraints to give the scheduler a hint which
376 * events to select for counter rescheduling.
377 *
378 * Care must be taken as the rescheduling algorithm is O(n!) which
379 * will increase scheduling cycles for an over-committed system
380 * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros
381 * and its counter masks must be kept at a minimum.
382 */
383 #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \
384 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0)
385
386 /*
387 * Constraint on the Event code.
388 */
389 #define INTEL_EVENT_CONSTRAINT(c, n) \
390 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
391
392 /*
393 * Constraint on a range of Event codes
394 */
395 #define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \
396 EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT)
397
398 /*
399 * Constraint on the Event code + UMask + fixed-mask
400 *
401 * filter mask to validate fixed counter events.
402 * the following filters disqualify for fixed counters:
403 * - inv
404 * - edge
405 * - cnt-mask
406 * - in_tx
407 * - in_tx_checkpointed
408 * The other filters are supported by fixed counters.
409 * The any-thread option is supported starting with v3.
410 */
411 #define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
412 #define FIXED_EVENT_CONSTRAINT(c, n) \
413 EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
414
415 /*
416 * The special metric counters do not actually exist. They are calculated from
417 * the combination of the FxCtr3 + MSR_PERF_METRICS.
418 *
419 * The special metric counters are mapped to a dummy offset for the scheduler.
420 * The sharing between multiple users of the same metric without multiplexing
421 * is not allowed, even though the hardware supports that in principle.
422 */
423
424 #define METRIC_EVENT_CONSTRAINT(c, n) \
425 EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \
426 INTEL_ARCH_EVENT_MASK)
427
428 /*
429 * Constraint on the Event code + UMask
430 */
431 #define INTEL_UEVENT_CONSTRAINT(c, n) \
432 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
433
434 /* Constraint on specific umask bit only + event */
435 #define INTEL_UBIT_EVENT_CONSTRAINT(c, n) \
436 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|(c))
437
438 /* Like UEVENT_CONSTRAINT, but match flags too */
439 #define INTEL_FLAGS_UEVENT_CONSTRAINT(c, n) \
440 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
441
442 #define INTEL_EXCLUEVT_CONSTRAINT(c, n) \
443 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
444 HWEIGHT(n), 0, PERF_X86_EVENT_EXCL)
445
446 #define INTEL_PLD_CONSTRAINT(c, n) \
447 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
448 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
449
450 #define INTEL_PSD_CONSTRAINT(c, n) \
451 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
452 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_STLAT)
453
454 #define INTEL_PST_CONSTRAINT(c, n) \
455 __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
456 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
457
458 /* Event constraint, but match on all event flags too. */
459 #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
460 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
461
462 #define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \
463 EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
464
465 /* Check only flags, but allow all event/umask */
466 #define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
467 EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
468
469 /* Check flags and event code, and set the HSW store flag */
470 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_ST(code, n) \
471 __EVENT_CONSTRAINT(code, n, \
472 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
473 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
474
475 /* Check flags and event code, and set the HSW load flag */
476 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD(code, n) \
477 __EVENT_CONSTRAINT(code, n, \
478 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
479 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
480
481 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \
482 __EVENT_CONSTRAINT_RANGE(code, end, n, \
483 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
484 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
485
486 #define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
487 __EVENT_CONSTRAINT(code, n, \
488 ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
489 HWEIGHT(n), 0, \
490 PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
491
492 /* Check flags and event code/umask, and set the HSW store flag */
493 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(code, n) \
494 __EVENT_CONSTRAINT(code, n, \
495 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
496 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
497
498 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XST(code, n) \
499 __EVENT_CONSTRAINT(code, n, \
500 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
501 HWEIGHT(n), 0, \
502 PERF_X86_EVENT_PEBS_ST_HSW|PERF_X86_EVENT_EXCL)
503
504 /* Check flags and event code/umask, and set the HSW load flag */
505 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(code, n) \
506 __EVENT_CONSTRAINT(code, n, \
507 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
508 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
509
510 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(code, n) \
511 __EVENT_CONSTRAINT(code, n, \
512 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
513 HWEIGHT(n), 0, \
514 PERF_X86_EVENT_PEBS_LD_HSW|PERF_X86_EVENT_EXCL)
515
516 /* Check flags and event code/umask, and set the HSW N/A flag */
517 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
518 __EVENT_CONSTRAINT(code, n, \
519 INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
520 HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
521
522
523 /*
524 * We define the end marker as having a weight of -1
525 * to enable blacklisting of events using a counter bitmask
526 * of zero and thus a weight of zero.
527 * The end marker has a weight that cannot possibly be
528 * obtained from counting the bits in the bitmask.
529 */
530 #define EVENT_CONSTRAINT_END { .weight = -1 }
531
532 /*
533 * Check for end marker with weight == -1
534 */
535 #define for_each_event_constraint(e, c) \
536 for ((e) = (c); (e)->weight != -1; (e)++)
537
538 /*
539 * Extra registers for specific events.
540 *
541 * Some events need large masks and require external MSRs.
542 * Those extra MSRs end up being shared for all events on
543 * a PMU and sometimes between PMU of sibling HT threads.
544 * In either case, the kernel needs to handle conflicting
545 * accesses to those extra, shared, regs. The data structure
546 * to manage those registers is stored in cpu_hw_event.
547 */
548 struct extra_reg {
549 unsigned int event;
550 unsigned int msr;
551 u64 config_mask;
552 u64 valid_mask;
553 int idx; /* per_xxx->regs[] reg index */
554 bool extra_msr_access;
555 };
556
557 #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
558 .event = (e), \
559 .msr = (ms), \
560 .config_mask = (m), \
561 .valid_mask = (vm), \
562 .idx = EXTRA_REG_##i, \
563 .extra_msr_access = true, \
564 }
565
566 #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
567 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
568
569 #define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \
570 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \
571 ARCH_PERFMON_EVENTSEL_UMASK, vm, idx)
572
573 #define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \
574 INTEL_UEVENT_EXTRA_REG(c, \
575 MSR_PEBS_LD_LAT_THRESHOLD, \
576 0xffff, \
577 LDLAT)
578
579 #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
580
581 union perf_capabilities {
582 struct {
583 u64 lbr_format:6;
584 u64 pebs_trap:1;
585 u64 pebs_arch_reg:1;
586 u64 pebs_format:4;
587 u64 smm_freeze:1;
588 /*
589 * PMU supports separate counter range for writing
590 * values > 32bit.
591 */
592 u64 full_width_write:1;
593 u64 pebs_baseline:1;
594 u64 perf_metrics:1;
595 u64 pebs_output_pt_available:1;
596 u64 anythread_deprecated:1;
597 };
598 u64 capabilities;
599 };
600
601 struct x86_pmu_quirk {
602 struct x86_pmu_quirk *next;
603 void (*func)(void);
604 };
605
606 union x86_pmu_config {
607 struct {
608 u64 event:8,
609 umask:8,
610 usr:1,
611 os:1,
612 edge:1,
613 pc:1,
614 interrupt:1,
615 __reserved1:1,
616 en:1,
617 inv:1,
618 cmask:8,
619 event2:4,
620 __reserved2:4,
621 go:1,
622 ho:1;
623 } bits;
624 u64 value;
625 };
626
627 #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
628
629 enum {
630 x86_lbr_exclusive_lbr,
631 x86_lbr_exclusive_bts,
632 x86_lbr_exclusive_pt,
633 x86_lbr_exclusive_max,
634 };
635
636 struct x86_hybrid_pmu {
637 struct pmu pmu;
638 const char *name;
639 u8 cpu_type;
640 cpumask_t supported_cpus;
641 union perf_capabilities intel_cap;
642 u64 intel_ctrl;
643 int max_pebs_events;
644 int num_counters;
645 int num_counters_fixed;
646 struct event_constraint unconstrained;
647
648 u64 hw_cache_event_ids
649 [PERF_COUNT_HW_CACHE_MAX]
650 [PERF_COUNT_HW_CACHE_OP_MAX]
651 [PERF_COUNT_HW_CACHE_RESULT_MAX];
652 u64 hw_cache_extra_regs
653 [PERF_COUNT_HW_CACHE_MAX]
654 [PERF_COUNT_HW_CACHE_OP_MAX]
655 [PERF_COUNT_HW_CACHE_RESULT_MAX];
656 struct event_constraint *event_constraints;
657 struct event_constraint *pebs_constraints;
658 struct extra_reg *extra_regs;
659
660 unsigned int late_ack :1,
661 mid_ack :1,
662 enabled_ack :1;
663 };
664
hybrid_pmu(struct pmu * pmu)665 static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
666 {
667 return container_of(pmu, struct x86_hybrid_pmu, pmu);
668 }
669
670 extern struct static_key_false perf_is_hybrid;
671 #define is_hybrid() static_branch_unlikely(&perf_is_hybrid)
672
673 #define hybrid(_pmu, _field) \
674 (*({ \
675 typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \
676 \
677 if (is_hybrid() && (_pmu)) \
678 __Fp = &hybrid_pmu(_pmu)->_field; \
679 \
680 __Fp; \
681 }))
682
683 #define hybrid_var(_pmu, _var) \
684 (*({ \
685 typeof(&_var) __Fp = &_var; \
686 \
687 if (is_hybrid() && (_pmu)) \
688 __Fp = &hybrid_pmu(_pmu)->_var; \
689 \
690 __Fp; \
691 }))
692
693 #define hybrid_bit(_pmu, _field) \
694 ({ \
695 bool __Fp = x86_pmu._field; \
696 \
697 if (is_hybrid() && (_pmu)) \
698 __Fp = hybrid_pmu(_pmu)->_field; \
699 \
700 __Fp; \
701 })
702
703 enum hybrid_pmu_type {
704 hybrid_big = 0x40,
705 hybrid_small = 0x20,
706
707 hybrid_big_small = hybrid_big | hybrid_small,
708 };
709
710 #define X86_HYBRID_PMU_ATOM_IDX 0
711 #define X86_HYBRID_PMU_CORE_IDX 1
712
713 #define X86_HYBRID_NUM_PMUS 2
714
715 /*
716 * struct x86_pmu - generic x86 pmu
717 */
718 struct x86_pmu {
719 /*
720 * Generic x86 PMC bits
721 */
722 const char *name;
723 int version;
724 int (*handle_irq)(struct pt_regs *);
725 void (*disable_all)(void);
726 void (*enable_all)(int added);
727 void (*enable)(struct perf_event *);
728 void (*disable)(struct perf_event *);
729 void (*add)(struct perf_event *);
730 void (*del)(struct perf_event *);
731 void (*read)(struct perf_event *event);
732 int (*hw_config)(struct perf_event *event);
733 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
734 unsigned eventsel;
735 unsigned perfctr;
736 int (*addr_offset)(int index, bool eventsel);
737 int (*rdpmc_index)(int index);
738 u64 (*event_map)(int);
739 int max_events;
740 int num_counters;
741 int num_counters_fixed;
742 int cntval_bits;
743 u64 cntval_mask;
744 union {
745 unsigned long events_maskl;
746 unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
747 };
748 int events_mask_len;
749 int apic;
750 u64 max_period;
751 struct event_constraint *
752 (*get_event_constraints)(struct cpu_hw_events *cpuc,
753 int idx,
754 struct perf_event *event);
755
756 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
757 struct perf_event *event);
758
759 void (*start_scheduling)(struct cpu_hw_events *cpuc);
760
761 void (*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
762
763 void (*stop_scheduling)(struct cpu_hw_events *cpuc);
764
765 struct event_constraint *event_constraints;
766 struct x86_pmu_quirk *quirks;
767 int perfctr_second_write;
768 u64 (*limit_period)(struct perf_event *event, u64 l);
769
770 /* PMI handler bits */
771 unsigned int late_ack :1,
772 mid_ack :1,
773 enabled_ack :1;
774 /*
775 * sysfs attrs
776 */
777 int attr_rdpmc_broken;
778 int attr_rdpmc;
779 struct attribute **format_attrs;
780
781 ssize_t (*events_sysfs_show)(char *page, u64 config);
782 const struct attribute_group **attr_update;
783
784 unsigned long attr_freeze_on_smi;
785
786 /*
787 * CPU Hotplug hooks
788 */
789 int (*cpu_prepare)(int cpu);
790 void (*cpu_starting)(int cpu);
791 void (*cpu_dying)(int cpu);
792 void (*cpu_dead)(int cpu);
793
794 void (*check_microcode)(void);
795 void (*sched_task)(struct perf_event_context *ctx,
796 bool sched_in);
797
798 /*
799 * Intel Arch Perfmon v2+
800 */
801 u64 intel_ctrl;
802 union perf_capabilities intel_cap;
803
804 /*
805 * Intel DebugStore bits
806 */
807 unsigned int bts :1,
808 bts_active :1,
809 pebs :1,
810 pebs_active :1,
811 pebs_broken :1,
812 pebs_prec_dist :1,
813 pebs_no_tlb :1,
814 pebs_no_isolation :1,
815 pebs_block :1;
816 int pebs_record_size;
817 int pebs_buffer_size;
818 int max_pebs_events;
819 void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
820 struct event_constraint *pebs_constraints;
821 void (*pebs_aliases)(struct perf_event *event);
822 unsigned long large_pebs_flags;
823 u64 rtm_abort_event;
824
825 /*
826 * Intel LBR
827 */
828 unsigned int lbr_tos, lbr_from, lbr_to,
829 lbr_info, lbr_nr; /* LBR base regs and size */
830 union {
831 u64 lbr_sel_mask; /* LBR_SELECT valid bits */
832 u64 lbr_ctl_mask; /* LBR_CTL valid bits */
833 };
834 union {
835 const int *lbr_sel_map; /* lbr_select mappings */
836 int *lbr_ctl_map; /* LBR_CTL mappings */
837 };
838 bool lbr_double_abort; /* duplicated lbr aborts */
839 bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
840
841 /*
842 * Intel Architectural LBR CPUID Enumeration
843 */
844 unsigned int lbr_depth_mask:8;
845 unsigned int lbr_deep_c_reset:1;
846 unsigned int lbr_lip:1;
847 unsigned int lbr_cpl:1;
848 unsigned int lbr_filter:1;
849 unsigned int lbr_call_stack:1;
850 unsigned int lbr_mispred:1;
851 unsigned int lbr_timed_lbr:1;
852 unsigned int lbr_br_type:1;
853
854 void (*lbr_reset)(void);
855 void (*lbr_read)(struct cpu_hw_events *cpuc);
856 void (*lbr_save)(void *ctx);
857 void (*lbr_restore)(void *ctx);
858
859 /*
860 * Intel PT/LBR/BTS are exclusive
861 */
862 atomic_t lbr_exclusive[x86_lbr_exclusive_max];
863
864 /*
865 * Intel perf metrics
866 */
867 int num_topdown_events;
868 u64 (*update_topdown_event)(struct perf_event *event);
869 int (*set_topdown_event_period)(struct perf_event *event);
870
871 /*
872 * perf task context (i.e. struct perf_event_context::task_ctx_data)
873 * switch helper to bridge calls from perf/core to perf/x86.
874 * See struct pmu::swap_task_ctx() usage for examples;
875 */
876 void (*swap_task_ctx)(struct perf_event_context *prev,
877 struct perf_event_context *next);
878
879 /*
880 * AMD bits
881 */
882 unsigned int amd_nb_constraints : 1;
883 u64 perf_ctr_pair_en;
884
885 /*
886 * Extra registers for events
887 */
888 struct extra_reg *extra_regs;
889 unsigned int flags;
890
891 /*
892 * Intel host/guest support (KVM)
893 */
894 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
895
896 /*
897 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
898 */
899 int (*check_period) (struct perf_event *event, u64 period);
900
901 int (*aux_output_match) (struct perf_event *event);
902
903 int (*filter_match)(struct perf_event *event);
904 /*
905 * Hybrid support
906 *
907 * Most PMU capabilities are the same among different hybrid PMUs.
908 * The global x86_pmu saves the architecture capabilities, which
909 * are available for all PMUs. The hybrid_pmu only includes the
910 * unique capabilities.
911 */
912 int num_hybrid_pmus;
913 struct x86_hybrid_pmu *hybrid_pmu;
914 u8 (*get_hybrid_cpu_type) (void);
915 };
916
917 struct x86_perf_task_context_opt {
918 int lbr_callstack_users;
919 int lbr_stack_state;
920 int log_id;
921 };
922
923 struct x86_perf_task_context {
924 u64 lbr_sel;
925 int tos;
926 int valid_lbrs;
927 struct x86_perf_task_context_opt opt;
928 struct lbr_entry lbr[MAX_LBR_ENTRIES];
929 };
930
931 struct x86_perf_task_context_arch_lbr {
932 struct x86_perf_task_context_opt opt;
933 struct lbr_entry entries[];
934 };
935
936 /*
937 * Add padding to guarantee the 64-byte alignment of the state buffer.
938 *
939 * The structure is dynamically allocated. The size of the LBR state may vary
940 * based on the number of LBR registers.
941 *
942 * Do not put anything after the LBR state.
943 */
944 struct x86_perf_task_context_arch_lbr_xsave {
945 struct x86_perf_task_context_opt opt;
946
947 union {
948 struct xregs_state xsave;
949 struct {
950 struct fxregs_state i387;
951 struct xstate_header header;
952 struct arch_lbr_state lbr;
953 } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT)));
954 };
955 };
956
957 #define x86_add_quirk(func_) \
958 do { \
959 static struct x86_pmu_quirk __quirk __initdata = { \
960 .func = func_, \
961 }; \
962 __quirk.next = x86_pmu.quirks; \
963 x86_pmu.quirks = &__quirk; \
964 } while (0)
965
966 /*
967 * x86_pmu flags
968 */
969 #define PMU_FL_NO_HT_SHARING 0x1 /* no hyper-threading resource sharing */
970 #define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */
971 #define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
972 #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
973 #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
974 #define PMU_FL_TFA 0x20 /* deal with TSX force abort */
975 #define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
976 #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
977 #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
978
979 #define EVENT_VAR(_id) event_attr_##_id
980 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr
981
982 #define EVENT_ATTR(_name, _id) \
983 static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
984 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
985 .id = PERF_COUNT_HW_##_id, \
986 .event_str = NULL, \
987 };
988
989 #define EVENT_ATTR_STR(_name, v, str) \
990 static struct perf_pmu_events_attr event_attr_##v = { \
991 .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
992 .id = 0, \
993 .event_str = str, \
994 };
995
996 #define EVENT_ATTR_STR_HT(_name, v, noht, ht) \
997 static struct perf_pmu_events_ht_attr event_attr_##v = { \
998 .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\
999 .id = 0, \
1000 .event_str_noht = noht, \
1001 .event_str_ht = ht, \
1002 }
1003
1004 #define EVENT_ATTR_STR_HYBRID(_name, v, str, _pmu) \
1005 static struct perf_pmu_events_hybrid_attr event_attr_##v = { \
1006 .attr = __ATTR(_name, 0444, events_hybrid_sysfs_show, NULL),\
1007 .id = 0, \
1008 .event_str = str, \
1009 .pmu_type = _pmu, \
1010 }
1011
1012 #define FORMAT_HYBRID_PTR(_id) (&format_attr_hybrid_##_id.attr.attr)
1013
1014 #define FORMAT_ATTR_HYBRID(_name, _pmu) \
1015 static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
1016 .attr = __ATTR_RO(_name), \
1017 .pmu_type = _pmu, \
1018 }
1019
1020 struct pmu *x86_get_pmu(unsigned int cpu);
1021 extern struct x86_pmu x86_pmu __read_mostly;
1022
task_context_opt(void * ctx)1023 static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
1024 {
1025 if (static_cpu_has(X86_FEATURE_ARCH_LBR))
1026 return &((struct x86_perf_task_context_arch_lbr *)ctx)->opt;
1027
1028 return &((struct x86_perf_task_context *)ctx)->opt;
1029 }
1030
x86_pmu_has_lbr_callstack(void)1031 static inline bool x86_pmu_has_lbr_callstack(void)
1032 {
1033 return x86_pmu.lbr_sel_map &&
1034 x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
1035 }
1036
1037 DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
1038
1039 int x86_perf_event_set_period(struct perf_event *event);
1040
1041 /*
1042 * Generalized hw caching related hw_event table, filled
1043 * in on a per model basis. A value of 0 means
1044 * 'not supported', -1 means 'hw_event makes no sense on
1045 * this CPU', any other value means the raw hw_event
1046 * ID.
1047 */
1048
1049 #define C(x) PERF_COUNT_HW_CACHE_##x
1050
1051 extern u64 __read_mostly hw_cache_event_ids
1052 [PERF_COUNT_HW_CACHE_MAX]
1053 [PERF_COUNT_HW_CACHE_OP_MAX]
1054 [PERF_COUNT_HW_CACHE_RESULT_MAX];
1055 extern u64 __read_mostly hw_cache_extra_regs
1056 [PERF_COUNT_HW_CACHE_MAX]
1057 [PERF_COUNT_HW_CACHE_OP_MAX]
1058 [PERF_COUNT_HW_CACHE_RESULT_MAX];
1059
1060 u64 x86_perf_event_update(struct perf_event *event);
1061
x86_pmu_config_addr(int index)1062 static inline unsigned int x86_pmu_config_addr(int index)
1063 {
1064 return x86_pmu.eventsel + (x86_pmu.addr_offset ?
1065 x86_pmu.addr_offset(index, true) : index);
1066 }
1067
x86_pmu_event_addr(int index)1068 static inline unsigned int x86_pmu_event_addr(int index)
1069 {
1070 return x86_pmu.perfctr + (x86_pmu.addr_offset ?
1071 x86_pmu.addr_offset(index, false) : index);
1072 }
1073
x86_pmu_rdpmc_index(int index)1074 static inline int x86_pmu_rdpmc_index(int index)
1075 {
1076 return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
1077 }
1078
1079 bool check_hw_exists(struct pmu *pmu, int num_counters,
1080 int num_counters_fixed);
1081
1082 int x86_add_exclusive(unsigned int what);
1083
1084 void x86_del_exclusive(unsigned int what);
1085
1086 int x86_reserve_hardware(void);
1087
1088 void x86_release_hardware(void);
1089
1090 int x86_pmu_max_precise(void);
1091
1092 void hw_perf_lbr_event_destroy(struct perf_event *event);
1093
1094 int x86_setup_perfctr(struct perf_event *event);
1095
1096 int x86_pmu_hw_config(struct perf_event *event);
1097
1098 void x86_pmu_disable_all(void);
1099
is_counter_pair(struct hw_perf_event * hwc)1100 static inline bool is_counter_pair(struct hw_perf_event *hwc)
1101 {
1102 return hwc->flags & PERF_X86_EVENT_PAIR;
1103 }
1104
__x86_pmu_enable_event(struct hw_perf_event * hwc,u64 enable_mask)1105 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
1106 u64 enable_mask)
1107 {
1108 u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
1109
1110 if (hwc->extra_reg.reg)
1111 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
1112
1113 /*
1114 * Add enabled Merge event on next counter
1115 * if large increment event being enabled on this counter
1116 */
1117 if (is_counter_pair(hwc))
1118 wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en);
1119
1120 wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
1121 }
1122
1123 void x86_pmu_enable_all(int added);
1124
1125 int perf_assign_events(struct event_constraint **constraints, int n,
1126 int wmin, int wmax, int gpmax, int *assign);
1127 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
1128
1129 void x86_pmu_stop(struct perf_event *event, int flags);
1130
x86_pmu_disable_event(struct perf_event * event)1131 static inline void x86_pmu_disable_event(struct perf_event *event)
1132 {
1133 u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
1134 struct hw_perf_event *hwc = &event->hw;
1135
1136 wrmsrl(hwc->config_base, hwc->config & ~disable_mask);
1137
1138 if (is_counter_pair(hwc))
1139 wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
1140 }
1141
1142 void x86_pmu_enable_event(struct perf_event *event);
1143
1144 int x86_pmu_handle_irq(struct pt_regs *regs);
1145
1146 void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
1147 u64 intel_ctrl);
1148
1149 void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu);
1150
1151 extern struct event_constraint emptyconstraint;
1152
1153 extern struct event_constraint unconstrained;
1154
kernel_ip(unsigned long ip)1155 static inline bool kernel_ip(unsigned long ip)
1156 {
1157 #ifdef CONFIG_X86_32
1158 return ip > PAGE_OFFSET;
1159 #else
1160 return (long)ip < 0;
1161 #endif
1162 }
1163
1164 /*
1165 * Not all PMUs provide the right context information to place the reported IP
1166 * into full context. Specifically segment registers are typically not
1167 * supplied.
1168 *
1169 * Assuming the address is a linear address (it is for IBS), we fake the CS and
1170 * vm86 mode using the known zero-based code segment and 'fix up' the registers
1171 * to reflect this.
1172 *
1173 * Intel PEBS/LBR appear to typically provide the effective address, nothing
1174 * much we can do about that but pray and treat it like a linear address.
1175 */
set_linear_ip(struct pt_regs * regs,unsigned long ip)1176 static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
1177 {
1178 regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
1179 if (regs->flags & X86_VM_MASK)
1180 regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
1181 regs->ip = ip;
1182 }
1183
1184 /*
1185 * x86control flow change classification
1186 * x86control flow changes include branches, interrupts, traps, faults
1187 */
1188 enum {
1189 X86_BR_NONE = 0, /* unknown */
1190
1191 X86_BR_USER = 1 << 0, /* branch target is user */
1192 X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
1193
1194 X86_BR_CALL = 1 << 2, /* call */
1195 X86_BR_RET = 1 << 3, /* return */
1196 X86_BR_SYSCALL = 1 << 4, /* syscall */
1197 X86_BR_SYSRET = 1 << 5, /* syscall return */
1198 X86_BR_INT = 1 << 6, /* sw interrupt */
1199 X86_BR_IRET = 1 << 7, /* return from interrupt */
1200 X86_BR_JCC = 1 << 8, /* conditional */
1201 X86_BR_JMP = 1 << 9, /* jump */
1202 X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
1203 X86_BR_IND_CALL = 1 << 11,/* indirect calls */
1204 X86_BR_ABORT = 1 << 12,/* transaction abort */
1205 X86_BR_IN_TX = 1 << 13,/* in transaction */
1206 X86_BR_NO_TX = 1 << 14,/* not in transaction */
1207 X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
1208 X86_BR_CALL_STACK = 1 << 16,/* call stack */
1209 X86_BR_IND_JMP = 1 << 17,/* indirect jump */
1210
1211 X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
1212
1213 };
1214
1215 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
1216 #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
1217
1218 #define X86_BR_ANY \
1219 (X86_BR_CALL |\
1220 X86_BR_RET |\
1221 X86_BR_SYSCALL |\
1222 X86_BR_SYSRET |\
1223 X86_BR_INT |\
1224 X86_BR_IRET |\
1225 X86_BR_JCC |\
1226 X86_BR_JMP |\
1227 X86_BR_IRQ |\
1228 X86_BR_ABORT |\
1229 X86_BR_IND_CALL |\
1230 X86_BR_IND_JMP |\
1231 X86_BR_ZERO_CALL)
1232
1233 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
1234
1235 #define X86_BR_ANY_CALL \
1236 (X86_BR_CALL |\
1237 X86_BR_IND_CALL |\
1238 X86_BR_ZERO_CALL |\
1239 X86_BR_SYSCALL |\
1240 X86_BR_IRQ |\
1241 X86_BR_INT)
1242
1243 int common_branch_type(int type);
1244 int branch_type(unsigned long from, unsigned long to, int abort);
1245
1246 ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
1247 ssize_t intel_event_sysfs_show(char *page, u64 config);
1248
1249 ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
1250 char *page);
1251 ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr,
1252 char *page);
1253 ssize_t events_hybrid_sysfs_show(struct device *dev,
1254 struct device_attribute *attr,
1255 char *page);
1256
fixed_counter_disabled(int i,struct pmu * pmu)1257 static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
1258 {
1259 u64 intel_ctrl = hybrid(pmu, intel_ctrl);
1260
1261 return !(intel_ctrl >> (i + INTEL_PMC_IDX_FIXED));
1262 }
1263
1264 #ifdef CONFIG_CPU_SUP_AMD
1265
1266 int amd_pmu_init(void);
1267
1268 #else /* CONFIG_CPU_SUP_AMD */
1269
amd_pmu_init(void)1270 static inline int amd_pmu_init(void)
1271 {
1272 return 0;
1273 }
1274
1275 #endif /* CONFIG_CPU_SUP_AMD */
1276
is_pebs_pt(struct perf_event * event)1277 static inline int is_pebs_pt(struct perf_event *event)
1278 {
1279 return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT);
1280 }
1281
1282 #ifdef CONFIG_CPU_SUP_INTEL
1283
intel_pmu_has_bts_period(struct perf_event * event,u64 period)1284 static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period)
1285 {
1286 struct hw_perf_event *hwc = &event->hw;
1287 unsigned int hw_event, bts_event;
1288
1289 if (event->attr.freq)
1290 return false;
1291
1292 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1293 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1294
1295 return hw_event == bts_event && period == 1;
1296 }
1297
intel_pmu_has_bts(struct perf_event * event)1298 static inline bool intel_pmu_has_bts(struct perf_event *event)
1299 {
1300 struct hw_perf_event *hwc = &event->hw;
1301
1302 return intel_pmu_has_bts_period(event, hwc->sample_period);
1303 }
1304
1305 int intel_pmu_save_and_restart(struct perf_event *event);
1306
1307 struct event_constraint *
1308 x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
1309 struct perf_event *event);
1310
1311 extern int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu);
1312 extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);
1313
1314 int intel_pmu_init(void);
1315
1316 void init_debug_store_on_cpu(int cpu);
1317
1318 void fini_debug_store_on_cpu(int cpu);
1319
1320 void release_ds_buffers(void);
1321
1322 void reserve_ds_buffers(void);
1323
1324 void release_lbr_buffers(void);
1325
1326 void reserve_lbr_buffers(void);
1327
1328 extern struct event_constraint bts_constraint;
1329 extern struct event_constraint vlbr_constraint;
1330
1331 void intel_pmu_enable_bts(u64 config);
1332
1333 void intel_pmu_disable_bts(void);
1334
1335 int intel_pmu_drain_bts_buffer(void);
1336
1337 extern struct event_constraint intel_core2_pebs_event_constraints[];
1338
1339 extern struct event_constraint intel_atom_pebs_event_constraints[];
1340
1341 extern struct event_constraint intel_slm_pebs_event_constraints[];
1342
1343 extern struct event_constraint intel_glm_pebs_event_constraints[];
1344
1345 extern struct event_constraint intel_glp_pebs_event_constraints[];
1346
1347 extern struct event_constraint intel_grt_pebs_event_constraints[];
1348
1349 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
1350
1351 extern struct event_constraint intel_westmere_pebs_event_constraints[];
1352
1353 extern struct event_constraint intel_snb_pebs_event_constraints[];
1354
1355 extern struct event_constraint intel_ivb_pebs_event_constraints[];
1356
1357 extern struct event_constraint intel_hsw_pebs_event_constraints[];
1358
1359 extern struct event_constraint intel_bdw_pebs_event_constraints[];
1360
1361 extern struct event_constraint intel_skl_pebs_event_constraints[];
1362
1363 extern struct event_constraint intel_icl_pebs_event_constraints[];
1364
1365 extern struct event_constraint intel_spr_pebs_event_constraints[];
1366
1367 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
1368
1369 void intel_pmu_pebs_add(struct perf_event *event);
1370
1371 void intel_pmu_pebs_del(struct perf_event *event);
1372
1373 void intel_pmu_pebs_enable(struct perf_event *event);
1374
1375 void intel_pmu_pebs_disable(struct perf_event *event);
1376
1377 void intel_pmu_pebs_enable_all(void);
1378
1379 void intel_pmu_pebs_disable_all(void);
1380
1381 void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
1382
1383 void intel_pmu_auto_reload_read(struct perf_event *event);
1384
1385 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
1386
1387 void intel_ds_init(void);
1388
1389 void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
1390 struct perf_event_context *next);
1391
1392 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
1393
1394 u64 lbr_from_signext_quirk_wr(u64 val);
1395
1396 void intel_pmu_lbr_reset(void);
1397
1398 void intel_pmu_lbr_reset_32(void);
1399
1400 void intel_pmu_lbr_reset_64(void);
1401
1402 void intel_pmu_lbr_add(struct perf_event *event);
1403
1404 void intel_pmu_lbr_del(struct perf_event *event);
1405
1406 void intel_pmu_lbr_enable_all(bool pmi);
1407
1408 void intel_pmu_lbr_disable_all(void);
1409
1410 void intel_pmu_lbr_read(void);
1411
1412 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc);
1413
1414 void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc);
1415
1416 void intel_pmu_lbr_save(void *ctx);
1417
1418 void intel_pmu_lbr_restore(void *ctx);
1419
1420 void intel_pmu_lbr_init_core(void);
1421
1422 void intel_pmu_lbr_init_nhm(void);
1423
1424 void intel_pmu_lbr_init_atom(void);
1425
1426 void intel_pmu_lbr_init_slm(void);
1427
1428 void intel_pmu_lbr_init_snb(void);
1429
1430 void intel_pmu_lbr_init_hsw(void);
1431
1432 void intel_pmu_lbr_init_skl(void);
1433
1434 void intel_pmu_lbr_init_knl(void);
1435
1436 void intel_pmu_arch_lbr_init(void);
1437
1438 void intel_pmu_pebs_data_source_nhm(void);
1439
1440 void intel_pmu_pebs_data_source_skl(bool pmem);
1441
1442 int intel_pmu_setup_lbr_filter(struct perf_event *event);
1443
1444 void intel_pt_interrupt(void);
1445
1446 int intel_bts_interrupt(void);
1447
1448 void intel_bts_enable_local(void);
1449
1450 void intel_bts_disable_local(void);
1451
1452 int p4_pmu_init(void);
1453
1454 int p6_pmu_init(void);
1455
1456 int knc_pmu_init(void);
1457
is_ht_workaround_enabled(void)1458 static inline int is_ht_workaround_enabled(void)
1459 {
1460 return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED);
1461 }
1462
1463 #else /* CONFIG_CPU_SUP_INTEL */
1464
reserve_ds_buffers(void)1465 static inline void reserve_ds_buffers(void)
1466 {
1467 }
1468
release_ds_buffers(void)1469 static inline void release_ds_buffers(void)
1470 {
1471 }
1472
release_lbr_buffers(void)1473 static inline void release_lbr_buffers(void)
1474 {
1475 }
1476
reserve_lbr_buffers(void)1477 static inline void reserve_lbr_buffers(void)
1478 {
1479 }
1480
intel_pmu_init(void)1481 static inline int intel_pmu_init(void)
1482 {
1483 return 0;
1484 }
1485
intel_cpuc_prepare(struct cpu_hw_events * cpuc,int cpu)1486 static inline int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
1487 {
1488 return 0;
1489 }
1490
intel_cpuc_finish(struct cpu_hw_events * cpuc)1491 static inline void intel_cpuc_finish(struct cpu_hw_events *cpuc)
1492 {
1493 }
1494
is_ht_workaround_enabled(void)1495 static inline int is_ht_workaround_enabled(void)
1496 {
1497 return 0;
1498 }
1499 #endif /* CONFIG_CPU_SUP_INTEL */
1500
1501 #if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN))
1502 int zhaoxin_pmu_init(void);
1503 #else
zhaoxin_pmu_init(void)1504 static inline int zhaoxin_pmu_init(void)
1505 {
1506 return 0;
1507 }
1508 #endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/
1509