• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * @file op_model_amd.c
3  * athlon / K7 / K8 / Family 10h model-specific MSR operations
4  *
5  * @remark Copyright 2002-2009 OProfile authors
6  * @remark Read the file COPYING
7  *
8  * @author John Levon
9  * @author Philippe Elie
10  * @author Graydon Hoare
11  * @author Robert Richter <robert.richter@amd.com>
12  * @author Barry Kasindorf <barry.kasindorf@amd.com>
13  * @author Jason Yeh <jason.yeh@amd.com>
14  * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
15  */
16 
17 #include <linux/oprofile.h>
18 #include <linux/device.h>
19 #include <linux/pci.h>
20 #include <linux/percpu.h>
21 
22 #include <asm/ptrace.h>
23 #include <asm/msr.h>
24 #include <asm/nmi.h>
25 #include <asm/apic.h>
26 #include <asm/processor.h>
27 
28 #include "op_x86_model.h"
29 #include "op_counter.h"
30 
31 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
32 #define NUM_VIRT_COUNTERS	32
33 #else
34 #define NUM_VIRT_COUNTERS	0
35 #endif
36 
37 #define OP_EVENT_MASK			0x0FFF
38 #define OP_CTR_OVERFLOW			(1ULL<<31)
39 
40 #define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
41 
42 static int num_counters;
43 static unsigned long reset_value[OP_MAX_COUNTER];
44 
45 #define IBS_FETCH_SIZE			6
46 #define IBS_OP_SIZE			12
47 
48 static u32 ibs_caps;
49 
50 struct ibs_config {
51 	unsigned long op_enabled;
52 	unsigned long fetch_enabled;
53 	unsigned long max_cnt_fetch;
54 	unsigned long max_cnt_op;
55 	unsigned long rand_en;
56 	unsigned long dispatched_ops;
57 	unsigned long branch_target;
58 };
59 
60 struct ibs_state {
61 	u64		ibs_op_ctl;
62 	int		branch_target;
63 	unsigned long	sample_size;
64 };
65 
66 static struct ibs_config ibs_config;
67 static struct ibs_state ibs_state;
68 
69 /*
70  * IBS randomization macros
71  */
72 #define IBS_RANDOM_BITS			12
73 #define IBS_RANDOM_MASK			((1ULL << IBS_RANDOM_BITS) - 1)
74 #define IBS_RANDOM_MAXCNT_OFFSET	(1ULL << (IBS_RANDOM_BITS - 5))
75 
76 /*
77  * 16-bit Linear Feedback Shift Register (LFSR)
78  *
79  *                       16   14   13    11
80  * Feedback polynomial = X  + X  + X  +  X  + 1
81  */
lfsr_random(void)82 static unsigned int lfsr_random(void)
83 {
84 	static unsigned int lfsr_value = 0xF00D;
85 	unsigned int bit;
86 
87 	/* Compute next bit to shift in */
88 	bit = ((lfsr_value >> 0) ^
89 	       (lfsr_value >> 2) ^
90 	       (lfsr_value >> 3) ^
91 	       (lfsr_value >> 5)) & 0x0001;
92 
93 	/* Advance to next register value */
94 	lfsr_value = (lfsr_value >> 1) | (bit << 15);
95 
96 	return lfsr_value;
97 }
98 
99 /*
100  * IBS software randomization
101  *
102  * The IBS periodic op counter is randomized in software. The lower 12
103  * bits of the 20 bit counter are randomized. IbsOpCurCnt is
104  * initialized with a 12 bit random value.
105  */
op_amd_randomize_ibs_op(u64 val)106 static inline u64 op_amd_randomize_ibs_op(u64 val)
107 {
108 	unsigned int random = lfsr_random();
109 
110 	if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
111 		/*
112 		 * Work around if the hw can not write to IbsOpCurCnt
113 		 *
114 		 * Randomize the lower 8 bits of the 16 bit
115 		 * IbsOpMaxCnt [15:0] value in the range of -128 to
116 		 * +127 by adding/subtracting an offset to the
117 		 * maximum count (IbsOpMaxCnt).
118 		 *
119 		 * To avoid over or underflows and protect upper bits
120 		 * starting at bit 16, the initial value for
121 		 * IbsOpMaxCnt must fit in the range from 0x0081 to
122 		 * 0xff80.
123 		 */
124 		val += (s8)(random >> 4);
125 	else
126 		val |= (u64)(random & IBS_RANDOM_MASK) << 32;
127 
128 	return val;
129 }
130 
131 static inline void
op_amd_handle_ibs(struct pt_regs * const regs,struct op_msrs const * const msrs)132 op_amd_handle_ibs(struct pt_regs * const regs,
133 		  struct op_msrs const * const msrs)
134 {
135 	u64 val, ctl;
136 	struct op_entry entry;
137 
138 	if (!ibs_caps)
139 		return;
140 
141 	if (ibs_config.fetch_enabled) {
142 		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
143 		if (ctl & IBS_FETCH_VAL) {
144 			rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
145 			oprofile_write_reserve(&entry, regs, val,
146 					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
147 			oprofile_add_data64(&entry, val);
148 			oprofile_add_data64(&entry, ctl);
149 			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
150 			oprofile_add_data64(&entry, val);
151 			oprofile_write_commit(&entry);
152 
153 			/* reenable the IRQ */
154 			ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
155 			ctl |= IBS_FETCH_ENABLE;
156 			wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
157 		}
158 	}
159 
160 	if (ibs_config.op_enabled) {
161 		rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
162 		if (ctl & IBS_OP_VAL) {
163 			rdmsrl(MSR_AMD64_IBSOPRIP, val);
164 			oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
165 					       ibs_state.sample_size);
166 			oprofile_add_data64(&entry, val);
167 			rdmsrl(MSR_AMD64_IBSOPDATA, val);
168 			oprofile_add_data64(&entry, val);
169 			rdmsrl(MSR_AMD64_IBSOPDATA2, val);
170 			oprofile_add_data64(&entry, val);
171 			rdmsrl(MSR_AMD64_IBSOPDATA3, val);
172 			oprofile_add_data64(&entry, val);
173 			rdmsrl(MSR_AMD64_IBSDCLINAD, val);
174 			oprofile_add_data64(&entry, val);
175 			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
176 			oprofile_add_data64(&entry, val);
177 			if (ibs_state.branch_target) {
178 				rdmsrl(MSR_AMD64_IBSBRTARGET, val);
179 				oprofile_add_data(&entry, (unsigned long)val);
180 			}
181 			oprofile_write_commit(&entry);
182 
183 			/* reenable the IRQ */
184 			ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
185 			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
186 		}
187 	}
188 }
189 
op_amd_start_ibs(void)190 static inline void op_amd_start_ibs(void)
191 {
192 	u64 val;
193 
194 	if (!ibs_caps)
195 		return;
196 
197 	memset(&ibs_state, 0, sizeof(ibs_state));
198 
199 	/*
200 	 * Note: Since the max count settings may out of range we
201 	 * write back the actual used values so that userland can read
202 	 * it.
203 	 */
204 
205 	if (ibs_config.fetch_enabled) {
206 		val = ibs_config.max_cnt_fetch >> 4;
207 		val = min(val, IBS_FETCH_MAX_CNT);
208 		ibs_config.max_cnt_fetch = val << 4;
209 		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
210 		val |= IBS_FETCH_ENABLE;
211 		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
212 	}
213 
214 	if (ibs_config.op_enabled) {
215 		val = ibs_config.max_cnt_op >> 4;
216 		if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
217 			/*
218 			 * IbsOpCurCnt not supported.  See
219 			 * op_amd_randomize_ibs_op() for details.
220 			 */
221 			val = clamp(val, 0x0081ULL, 0xFF80ULL);
222 			ibs_config.max_cnt_op = val << 4;
223 		} else {
224 			/*
225 			 * The start value is randomized with a
226 			 * positive offset, we need to compensate it
227 			 * with the half of the randomized range. Also
228 			 * avoid underflows.
229 			 */
230 			val += IBS_RANDOM_MAXCNT_OFFSET;
231 			if (ibs_caps & IBS_CAPS_OPCNTEXT)
232 				val = min(val, IBS_OP_MAX_CNT_EXT);
233 			else
234 				val = min(val, IBS_OP_MAX_CNT);
235 			ibs_config.max_cnt_op =
236 				(val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
237 		}
238 		val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
239 		val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
240 		val |= IBS_OP_ENABLE;
241 		ibs_state.ibs_op_ctl = val;
242 		ibs_state.sample_size = IBS_OP_SIZE;
243 		if (ibs_config.branch_target) {
244 			ibs_state.branch_target = 1;
245 			ibs_state.sample_size++;
246 		}
247 		val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
248 		wrmsrl(MSR_AMD64_IBSOPCTL, val);
249 	}
250 }
251 
op_amd_stop_ibs(void)252 static void op_amd_stop_ibs(void)
253 {
254 	if (!ibs_caps)
255 		return;
256 
257 	if (ibs_config.fetch_enabled)
258 		/* clear max count and enable */
259 		wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
260 
261 	if (ibs_config.op_enabled)
262 		/* clear max count and enable */
263 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
264 }
265 
266 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
267 
op_mux_switch_ctrl(struct op_x86_model_spec const * model,struct op_msrs const * const msrs)268 static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
269 			       struct op_msrs const * const msrs)
270 {
271 	u64 val;
272 	int i;
273 
274 	/* enable active counters */
275 	for (i = 0; i < num_counters; ++i) {
276 		int virt = op_x86_phys_to_virt(i);
277 		if (!reset_value[virt])
278 			continue;
279 		rdmsrl(msrs->controls[i].addr, val);
280 		val &= model->reserved;
281 		val |= op_x86_get_ctrl(model, &counter_config[virt]);
282 		wrmsrl(msrs->controls[i].addr, val);
283 	}
284 }
285 
286 #endif
287 
288 /* functions for op_amd_spec */
289 
op_amd_shutdown(struct op_msrs const * const msrs)290 static void op_amd_shutdown(struct op_msrs const * const msrs)
291 {
292 	int i;
293 
294 	for (i = 0; i < num_counters; ++i) {
295 		if (!msrs->counters[i].addr)
296 			continue;
297 		release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
298 		release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
299 	}
300 }
301 
op_amd_fill_in_addresses(struct op_msrs * const msrs)302 static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
303 {
304 	int i;
305 
306 	for (i = 0; i < num_counters; i++) {
307 		if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
308 			goto fail;
309 		if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
310 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
311 			goto fail;
312 		}
313 		/* both registers must be reserved */
314 		if (num_counters == AMD64_NUM_COUNTERS_CORE) {
315 			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
316 			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
317 		} else {
318 			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
319 			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
320 		}
321 		continue;
322 	fail:
323 		if (!counter_config[i].enabled)
324 			continue;
325 		op_x86_warn_reserved(i);
326 		op_amd_shutdown(msrs);
327 		return -EBUSY;
328 	}
329 
330 	return 0;
331 }
332 
op_amd_setup_ctrs(struct op_x86_model_spec const * model,struct op_msrs const * const msrs)333 static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
334 			      struct op_msrs const * const msrs)
335 {
336 	u64 val;
337 	int i;
338 
339 	/* setup reset_value */
340 	for (i = 0; i < OP_MAX_COUNTER; ++i) {
341 		if (counter_config[i].enabled
342 		    && msrs->counters[op_x86_virt_to_phys(i)].addr)
343 			reset_value[i] = counter_config[i].count;
344 		else
345 			reset_value[i] = 0;
346 	}
347 
348 	/* clear all counters */
349 	for (i = 0; i < num_counters; ++i) {
350 		if (!msrs->controls[i].addr)
351 			continue;
352 		rdmsrl(msrs->controls[i].addr, val);
353 		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
354 			op_x86_warn_in_use(i);
355 		val &= model->reserved;
356 		wrmsrl(msrs->controls[i].addr, val);
357 		/*
358 		 * avoid a false detection of ctr overflows in NMI
359 		 * handler
360 		 */
361 		wrmsrl(msrs->counters[i].addr, -1LL);
362 	}
363 
364 	/* enable active counters */
365 	for (i = 0; i < num_counters; ++i) {
366 		int virt = op_x86_phys_to_virt(i);
367 		if (!reset_value[virt])
368 			continue;
369 
370 		/* setup counter registers */
371 		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
372 
373 		/* setup control registers */
374 		rdmsrl(msrs->controls[i].addr, val);
375 		val &= model->reserved;
376 		val |= op_x86_get_ctrl(model, &counter_config[virt]);
377 		wrmsrl(msrs->controls[i].addr, val);
378 	}
379 }
380 
op_amd_check_ctrs(struct pt_regs * const regs,struct op_msrs const * const msrs)381 static int op_amd_check_ctrs(struct pt_regs * const regs,
382 			     struct op_msrs const * const msrs)
383 {
384 	u64 val;
385 	int i;
386 
387 	for (i = 0; i < num_counters; ++i) {
388 		int virt = op_x86_phys_to_virt(i);
389 		if (!reset_value[virt])
390 			continue;
391 		rdmsrl(msrs->counters[i].addr, val);
392 		/* bit is clear if overflowed: */
393 		if (val & OP_CTR_OVERFLOW)
394 			continue;
395 		oprofile_add_sample(regs, virt);
396 		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
397 	}
398 
399 	op_amd_handle_ibs(regs, msrs);
400 
401 	/* See op_model_ppro.c */
402 	return 1;
403 }
404 
op_amd_start(struct op_msrs const * const msrs)405 static void op_amd_start(struct op_msrs const * const msrs)
406 {
407 	u64 val;
408 	int i;
409 
410 	for (i = 0; i < num_counters; ++i) {
411 		if (!reset_value[op_x86_phys_to_virt(i)])
412 			continue;
413 		rdmsrl(msrs->controls[i].addr, val);
414 		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
415 		wrmsrl(msrs->controls[i].addr, val);
416 	}
417 
418 	op_amd_start_ibs();
419 }
420 
op_amd_stop(struct op_msrs const * const msrs)421 static void op_amd_stop(struct op_msrs const * const msrs)
422 {
423 	u64 val;
424 	int i;
425 
426 	/*
427 	 * Subtle: stop on all counters to avoid race with setting our
428 	 * pm callback
429 	 */
430 	for (i = 0; i < num_counters; ++i) {
431 		if (!reset_value[op_x86_phys_to_virt(i)])
432 			continue;
433 		rdmsrl(msrs->controls[i].addr, val);
434 		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
435 		wrmsrl(msrs->controls[i].addr, val);
436 	}
437 
438 	op_amd_stop_ibs();
439 }
440 
441 /*
442  * check and reserve APIC extended interrupt LVT offset for IBS if
443  * available
444  */
445 
init_ibs(void)446 static void init_ibs(void)
447 {
448 	ibs_caps = get_ibs_caps();
449 
450 	if (!ibs_caps)
451 		return;
452 
453 	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
454 }
455 
456 static int (*create_arch_files)(struct dentry *root);
457 
setup_ibs_files(struct dentry * root)458 static int setup_ibs_files(struct dentry *root)
459 {
460 	struct dentry *dir;
461 	int ret = 0;
462 
463 	/* architecture specific files */
464 	if (create_arch_files)
465 		ret = create_arch_files(root);
466 
467 	if (ret)
468 		return ret;
469 
470 	if (!ibs_caps)
471 		return ret;
472 
473 	/* model specific files */
474 
475 	/* setup some reasonable defaults */
476 	memset(&ibs_config, 0, sizeof(ibs_config));
477 	ibs_config.max_cnt_fetch = 250000;
478 	ibs_config.max_cnt_op = 250000;
479 
480 	if (ibs_caps & IBS_CAPS_FETCHSAM) {
481 		dir = oprofilefs_mkdir(root, "ibs_fetch");
482 		oprofilefs_create_ulong(dir, "enable",
483 					&ibs_config.fetch_enabled);
484 		oprofilefs_create_ulong(dir, "max_count",
485 					&ibs_config.max_cnt_fetch);
486 		oprofilefs_create_ulong(dir, "rand_enable",
487 					&ibs_config.rand_en);
488 	}
489 
490 	if (ibs_caps & IBS_CAPS_OPSAM) {
491 		dir = oprofilefs_mkdir(root, "ibs_op");
492 		oprofilefs_create_ulong(dir, "enable",
493 					&ibs_config.op_enabled);
494 		oprofilefs_create_ulong(dir, "max_count",
495 					&ibs_config.max_cnt_op);
496 		if (ibs_caps & IBS_CAPS_OPCNT)
497 			oprofilefs_create_ulong(dir, "dispatched_ops",
498 						&ibs_config.dispatched_ops);
499 		if (ibs_caps & IBS_CAPS_BRNTRGT)
500 			oprofilefs_create_ulong(dir, "branch_target",
501 						&ibs_config.branch_target);
502 	}
503 
504 	return 0;
505 }
506 
507 struct op_x86_model_spec op_amd_spec;
508 
op_amd_init(struct oprofile_operations * ops)509 static int op_amd_init(struct oprofile_operations *ops)
510 {
511 	init_ibs();
512 	create_arch_files = ops->create_files;
513 	ops->create_files = setup_ibs_files;
514 
515 	if (boot_cpu_data.x86 == 0x15) {
516 		num_counters = AMD64_NUM_COUNTERS_CORE;
517 	} else {
518 		num_counters = AMD64_NUM_COUNTERS;
519 	}
520 
521 	op_amd_spec.num_counters = num_counters;
522 	op_amd_spec.num_controls = num_counters;
523 	op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
524 
525 	return 0;
526 }
527 
528 struct op_x86_model_spec op_amd_spec = {
529 	/* num_counters/num_controls filled in at runtime */
530 	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
531 	.event_mask		= OP_EVENT_MASK,
532 	.init			= op_amd_init,
533 	.fill_in_addresses	= &op_amd_fill_in_addresses,
534 	.setup_ctrs		= &op_amd_setup_ctrs,
535 	.check_ctrs		= &op_amd_check_ctrs,
536 	.start			= &op_amd_start,
537 	.stop			= &op_amd_stop,
538 	.shutdown		= &op_amd_shutdown,
539 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
540 	.switch_ctrl		= &op_mux_switch_ctrl,
541 #endif
542 };
543