• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * local apic based NMI watchdog for various CPUs.
3  *
4  * This file also handles reservation of performance counters for coordination
5  * with other users (like oprofile).
6  *
7  * Note that these events normally don't tick when the CPU idles. This means
8  * the frequency varies with CPU load.
9  *
10  * Original code for K7/P6 written by Keith Owens
11  *
12  */
13 
14 #include <linux/percpu.h>
15 #include <linux/module.h>
16 #include <linux/kernel.h>
17 #include <linux/bitops.h>
18 #include <linux/smp.h>
19 #include <linux/nmi.h>
20 #include <linux/kprobes.h>
21 
22 #include <asm/apic.h>
23 #include <asm/intel_arch_perfmon.h>
24 
25 struct nmi_watchdog_ctlblk {
26 	unsigned int cccr_msr;
27 	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
28 	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
29 };
30 
31 /* Interface defining a CPU specific perfctr watchdog */
32 struct wd_ops {
33 	int (*reserve)(void);
34 	void (*unreserve)(void);
35 	int (*setup)(unsigned nmi_hz);
36 	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 	void (*stop)(void);
38 	unsigned perfctr;
39 	unsigned evntsel;
40 	u64 checkbit;
41 };
42 
43 static const struct wd_ops *wd_ops;
44 
45 /*
46  * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47  * offset from MSR_P4_BSU_ESCR0.
48  *
49  * It will be the max for all platforms (for now)
50  */
51 #define NMI_MAX_COUNTER_BITS 66
52 
53 /*
54  * perfctr_nmi_owner tracks the ownership of the perfctr registers:
55  * evtsel_nmi_owner tracks the ownership of the event selection
56  * - different performance counters/ event selection may be reserved for
57  *   different subsystems this reservation system just tries to coordinate
58  *   things a little
59  */
60 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 
63 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64 
65 /* converts an msr to an appropriate reservation bit */
nmi_perfctr_msr_to_bit(unsigned int msr)66 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67 {
68 	/* returns the bit offset of the performance counter register */
69 	switch (boot_cpu_data.x86_vendor) {
70 	case X86_VENDOR_AMD:
71 		return (msr - MSR_K7_PERFCTR0);
72 	case X86_VENDOR_INTEL:
73 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
74 			return (msr - MSR_ARCH_PERFMON_PERFCTR0);
75 
76 		switch (boot_cpu_data.x86) {
77 		case 6:
78 			return (msr - MSR_P6_PERFCTR0);
79 		case 15:
80 			return (msr - MSR_P4_BPU_PERFCTR0);
81 		}
82 	}
83 	return 0;
84 }
85 
86 /*
87  * converts an msr to an appropriate reservation bit
88  * returns the bit offset of the event selection register
89  */
nmi_evntsel_msr_to_bit(unsigned int msr)90 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
91 {
92 	/* returns the bit offset of the event selection register */
93 	switch (boot_cpu_data.x86_vendor) {
94 	case X86_VENDOR_AMD:
95 		return (msr - MSR_K7_EVNTSEL0);
96 	case X86_VENDOR_INTEL:
97 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
98 			return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
99 
100 		switch (boot_cpu_data.x86) {
101 		case 6:
102 			return (msr - MSR_P6_EVNTSEL0);
103 		case 15:
104 			return (msr - MSR_P4_BSU_ESCR0);
105 		}
106 	}
107 	return 0;
108 
109 }
110 
111 /* checks for a bit availability (hack for oprofile) */
avail_to_resrv_perfctr_nmi_bit(unsigned int counter)112 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
113 {
114 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115 
116 	return (!test_bit(counter, perfctr_nmi_owner));
117 }
118 
119 /* checks the an msr for availability */
avail_to_resrv_perfctr_nmi(unsigned int msr)120 int avail_to_resrv_perfctr_nmi(unsigned int msr)
121 {
122 	unsigned int counter;
123 
124 	counter = nmi_perfctr_msr_to_bit(msr);
125 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126 
127 	return (!test_bit(counter, perfctr_nmi_owner));
128 }
129 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 
reserve_perfctr_nmi(unsigned int msr)131 int reserve_perfctr_nmi(unsigned int msr)
132 {
133 	unsigned int counter;
134 
135 	counter = nmi_perfctr_msr_to_bit(msr);
136 	/* register not managed by the allocator? */
137 	if (counter > NMI_MAX_COUNTER_BITS)
138 		return 1;
139 
140 	if (!test_and_set_bit(counter, perfctr_nmi_owner))
141 		return 1;
142 	return 0;
143 }
144 EXPORT_SYMBOL(reserve_perfctr_nmi);
145 
release_perfctr_nmi(unsigned int msr)146 void release_perfctr_nmi(unsigned int msr)
147 {
148 	unsigned int counter;
149 
150 	counter = nmi_perfctr_msr_to_bit(msr);
151 	/* register not managed by the allocator? */
152 	if (counter > NMI_MAX_COUNTER_BITS)
153 		return;
154 
155 	clear_bit(counter, perfctr_nmi_owner);
156 }
157 EXPORT_SYMBOL(release_perfctr_nmi);
158 
reserve_evntsel_nmi(unsigned int msr)159 int reserve_evntsel_nmi(unsigned int msr)
160 {
161 	unsigned int counter;
162 
163 	counter = nmi_evntsel_msr_to_bit(msr);
164 	/* register not managed by the allocator? */
165 	if (counter > NMI_MAX_COUNTER_BITS)
166 		return 1;
167 
168 	if (!test_and_set_bit(counter, evntsel_nmi_owner))
169 		return 1;
170 	return 0;
171 }
172 EXPORT_SYMBOL(reserve_evntsel_nmi);
173 
release_evntsel_nmi(unsigned int msr)174 void release_evntsel_nmi(unsigned int msr)
175 {
176 	unsigned int counter;
177 
178 	counter = nmi_evntsel_msr_to_bit(msr);
179 	/* register not managed by the allocator? */
180 	if (counter > NMI_MAX_COUNTER_BITS)
181 		return;
182 
183 	clear_bit(counter, evntsel_nmi_owner);
184 }
185 EXPORT_SYMBOL(release_evntsel_nmi);
186 
disable_lapic_nmi_watchdog(void)187 void disable_lapic_nmi_watchdog(void)
188 {
189 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
190 
191 	if (atomic_read(&nmi_active) <= 0)
192 		return;
193 
194 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
195 
196 	if (wd_ops)
197 		wd_ops->unreserve();
198 
199 	BUG_ON(atomic_read(&nmi_active) != 0);
200 }
201 
enable_lapic_nmi_watchdog(void)202 void enable_lapic_nmi_watchdog(void)
203 {
204 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
205 
206 	/* are we already enabled */
207 	if (atomic_read(&nmi_active) != 0)
208 		return;
209 
210 	/* are we lapic aware */
211 	if (!wd_ops)
212 		return;
213 	if (!wd_ops->reserve()) {
214 		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
215 		return;
216 	}
217 
218 	on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
219 	touch_nmi_watchdog();
220 }
221 
222 /*
223  * Activate the NMI watchdog via the local APIC.
224  */
225 
adjust_for_32bit_ctr(unsigned int hz)226 static unsigned int adjust_for_32bit_ctr(unsigned int hz)
227 {
228 	u64 counter_val;
229 	unsigned int retval = hz;
230 
231 	/*
232 	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
233 	 * are writable, with higher bits sign extending from bit 31.
234 	 * So, we can only program the counter with 31 bit values and
235 	 * 32nd bit should be 1, for 33.. to be 1.
236 	 * Find the appropriate nmi_hz
237 	 */
238 	counter_val = (u64)cpu_khz * 1000;
239 	do_div(counter_val, retval);
240  	if (counter_val > 0x7fffffffULL) {
241 		u64 count = (u64)cpu_khz * 1000;
242 		do_div(count, 0x7fffffffUL);
243 		retval = count + 1;
244 	}
245 	return retval;
246 }
247 
write_watchdog_counter(unsigned int perfctr_msr,const char * descr,unsigned nmi_hz)248 static void write_watchdog_counter(unsigned int perfctr_msr,
249 				const char *descr, unsigned nmi_hz)
250 {
251 	u64 count = (u64)cpu_khz * 1000;
252 
253 	do_div(count, nmi_hz);
254 	if(descr)
255 		pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 	wrmsrl(perfctr_msr, 0 - count);
257 }
258 
write_watchdog_counter32(unsigned int perfctr_msr,const char * descr,unsigned nmi_hz)259 static void write_watchdog_counter32(unsigned int perfctr_msr,
260 				const char *descr, unsigned nmi_hz)
261 {
262 	u64 count = (u64)cpu_khz * 1000;
263 
264 	do_div(count, nmi_hz);
265 	if(descr)
266 		pr_debug("setting %s to -0x%08Lx\n", descr, count);
267 	wrmsr(perfctr_msr, (u32)(-count), 0);
268 }
269 
270 /*
271  * AMD K7/K8/Family10h/Family11h support.
272  * AMD keeps this interface nicely stable so there is not much variety
273  */
274 #define K7_EVNTSEL_ENABLE	(1 << 22)
275 #define K7_EVNTSEL_INT		(1 << 20)
276 #define K7_EVNTSEL_OS		(1 << 17)
277 #define K7_EVNTSEL_USR		(1 << 16)
278 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
279 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
280 
setup_k7_watchdog(unsigned nmi_hz)281 static int setup_k7_watchdog(unsigned nmi_hz)
282 {
283 	unsigned int perfctr_msr, evntsel_msr;
284 	unsigned int evntsel;
285 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
286 
287 	perfctr_msr = wd_ops->perfctr;
288 	evntsel_msr = wd_ops->evntsel;
289 
290 	wrmsrl(perfctr_msr, 0UL);
291 
292 	evntsel = K7_EVNTSEL_INT
293 		| K7_EVNTSEL_OS
294 		| K7_EVNTSEL_USR
295 		| K7_NMI_EVENT;
296 
297 	/* setup the timer */
298 	wrmsr(evntsel_msr, evntsel, 0);
299 	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
300 
301 	/* initialize the wd struct before enabling */
302 	wd->perfctr_msr = perfctr_msr;
303 	wd->evntsel_msr = evntsel_msr;
304 	wd->cccr_msr = 0;  /* unused */
305 
306 	/* ok, everything is initialized, announce that we're set */
307 	cpu_nmi_set_wd_enabled();
308 
309 	apic_write(APIC_LVTPC, APIC_DM_NMI);
310 	evntsel |= K7_EVNTSEL_ENABLE;
311 	wrmsr(evntsel_msr, evntsel, 0);
312 
313 	return 1;
314 }
315 
single_msr_stop_watchdog(void)316 static void single_msr_stop_watchdog(void)
317 {
318 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
319 
320 	wrmsr(wd->evntsel_msr, 0, 0);
321 }
322 
single_msr_reserve(void)323 static int single_msr_reserve(void)
324 {
325 	if (!reserve_perfctr_nmi(wd_ops->perfctr))
326 		return 0;
327 
328 	if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
329 		release_perfctr_nmi(wd_ops->perfctr);
330 		return 0;
331 	}
332 	return 1;
333 }
334 
single_msr_unreserve(void)335 static void single_msr_unreserve(void)
336 {
337 	release_evntsel_nmi(wd_ops->evntsel);
338 	release_perfctr_nmi(wd_ops->perfctr);
339 }
340 
341 static void __kprobes
single_msr_rearm(struct nmi_watchdog_ctlblk * wd,unsigned nmi_hz)342 single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
343 {
344 	/* start the cycle over again */
345 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
346 }
347 
348 static const struct wd_ops k7_wd_ops = {
349 	.reserve	= single_msr_reserve,
350 	.unreserve	= single_msr_unreserve,
351 	.setup		= setup_k7_watchdog,
352 	.rearm		= single_msr_rearm,
353 	.stop		= single_msr_stop_watchdog,
354 	.perfctr	= MSR_K7_PERFCTR0,
355 	.evntsel	= MSR_K7_EVNTSEL0,
356 	.checkbit	= 1ULL << 47,
357 };
358 
359 /*
360  * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
361  */
362 #define P6_EVNTSEL0_ENABLE	(1 << 22)
363 #define P6_EVNTSEL_INT		(1 << 20)
364 #define P6_EVNTSEL_OS		(1 << 17)
365 #define P6_EVNTSEL_USR		(1 << 16)
366 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
367 #define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
368 
setup_p6_watchdog(unsigned nmi_hz)369 static int setup_p6_watchdog(unsigned nmi_hz)
370 {
371 	unsigned int perfctr_msr, evntsel_msr;
372 	unsigned int evntsel;
373 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
374 
375 	perfctr_msr = wd_ops->perfctr;
376 	evntsel_msr = wd_ops->evntsel;
377 
378 	/* KVM doesn't implement this MSR */
379 	if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
380 		return 0;
381 
382 	evntsel = P6_EVNTSEL_INT
383 		| P6_EVNTSEL_OS
384 		| P6_EVNTSEL_USR
385 		| P6_NMI_EVENT;
386 
387 	/* setup the timer */
388 	wrmsr(evntsel_msr, evntsel, 0);
389 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
390 	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
391 
392 	/* initialize the wd struct before enabling */
393 	wd->perfctr_msr = perfctr_msr;
394 	wd->evntsel_msr = evntsel_msr;
395 	wd->cccr_msr = 0;  /* unused */
396 
397 	/* ok, everything is initialized, announce that we're set */
398 	cpu_nmi_set_wd_enabled();
399 
400 	apic_write(APIC_LVTPC, APIC_DM_NMI);
401 	evntsel |= P6_EVNTSEL0_ENABLE;
402 	wrmsr(evntsel_msr, evntsel, 0);
403 
404 	return 1;
405 }
406 
p6_rearm(struct nmi_watchdog_ctlblk * wd,unsigned nmi_hz)407 static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
408 {
409 	/*
410 	 * P6 based Pentium M need to re-unmask
411 	 * the apic vector but it doesn't hurt
412 	 * other P6 variant.
413 	 * ArchPerfom/Core Duo also needs this
414 	 */
415 	apic_write(APIC_LVTPC, APIC_DM_NMI);
416 
417 	/* P6/ARCH_PERFMON has 32 bit counter write */
418 	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
419 }
420 
421 static const struct wd_ops p6_wd_ops = {
422 	.reserve	= single_msr_reserve,
423 	.unreserve	= single_msr_unreserve,
424 	.setup		= setup_p6_watchdog,
425 	.rearm		= p6_rearm,
426 	.stop		= single_msr_stop_watchdog,
427 	.perfctr	= MSR_P6_PERFCTR0,
428 	.evntsel	= MSR_P6_EVNTSEL0,
429 	.checkbit	= 1ULL << 39,
430 };
431 
432 /*
433  * Intel P4 performance counters.
434  * By far the most complicated of all.
435  */
436 #define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1 << 7)
437 #define P4_ESCR_EVENT_SELECT(N)	((N) << 25)
438 #define P4_ESCR_OS		(1 << 3)
439 #define P4_ESCR_USR		(1 << 2)
440 #define P4_CCCR_OVF_PMI0	(1 << 26)
441 #define P4_CCCR_OVF_PMI1	(1 << 27)
442 #define P4_CCCR_THRESHOLD(N)	((N) << 20)
443 #define P4_CCCR_COMPLEMENT	(1 << 19)
444 #define P4_CCCR_COMPARE		(1 << 18)
445 #define P4_CCCR_REQUIRED	(3 << 16)
446 #define P4_CCCR_ESCR_SELECT(N)	((N) << 13)
447 #define P4_CCCR_ENABLE		(1 << 12)
448 #define P4_CCCR_OVF 		(1 << 31)
449 
450 #define P4_CONTROLS 18
451 static unsigned int p4_controls[18] = {
452 	MSR_P4_BPU_CCCR0,
453 	MSR_P4_BPU_CCCR1,
454 	MSR_P4_BPU_CCCR2,
455 	MSR_P4_BPU_CCCR3,
456 	MSR_P4_MS_CCCR0,
457 	MSR_P4_MS_CCCR1,
458 	MSR_P4_MS_CCCR2,
459 	MSR_P4_MS_CCCR3,
460 	MSR_P4_FLAME_CCCR0,
461 	MSR_P4_FLAME_CCCR1,
462 	MSR_P4_FLAME_CCCR2,
463 	MSR_P4_FLAME_CCCR3,
464 	MSR_P4_IQ_CCCR0,
465 	MSR_P4_IQ_CCCR1,
466 	MSR_P4_IQ_CCCR2,
467 	MSR_P4_IQ_CCCR3,
468 	MSR_P4_IQ_CCCR4,
469 	MSR_P4_IQ_CCCR5,
470 };
471 /*
472  * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
473  * CRU_ESCR0 (with any non-null event selector) through a complemented
474  * max threshold. [IA32-Vol3, Section 14.9.9]
475  */
setup_p4_watchdog(unsigned nmi_hz)476 static int setup_p4_watchdog(unsigned nmi_hz)
477 {
478 	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
479 	unsigned int evntsel, cccr_val;
480 	unsigned int misc_enable, dummy;
481 	unsigned int ht_num;
482 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
483 
484 	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
485 	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
486 		return 0;
487 
488 #ifdef CONFIG_SMP
489 	/* detect which hyperthread we are on */
490 	if (smp_num_siblings == 2) {
491 		unsigned int ebx, apicid;
492 
493         	ebx = cpuid_ebx(1);
494 	        apicid = (ebx >> 24) & 0xff;
495         	ht_num = apicid & 1;
496 	} else
497 #endif
498 		ht_num = 0;
499 
500 	/*
501 	 * performance counters are shared resources
502 	 * assign each hyperthread its own set
503 	 * (re-use the ESCR0 register, seems safe
504 	 * and keeps the cccr_val the same)
505 	 */
506 	if (!ht_num) {
507 		/* logical cpu 0 */
508 		perfctr_msr = MSR_P4_IQ_PERFCTR0;
509 		evntsel_msr = MSR_P4_CRU_ESCR0;
510 		cccr_msr = MSR_P4_IQ_CCCR0;
511 		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
512 
513 		/*
514 		 * If we're on the kdump kernel or other situation, we may
515 		 * still have other performance counter registers set to
516 		 * interrupt and they'll keep interrupting forever because
517 		 * of the P4_CCCR_OVF quirk. So we need to ACK all the
518 		 * pending interrupts and disable all the registers here,
519 		 * before reenabling the NMI delivery. Refer to p4_rearm()
520 		 * about the P4_CCCR_OVF quirk.
521 		 */
522 		if (reset_devices) {
523 			unsigned int low, high;
524 			int i;
525 
526 			for (i = 0; i < P4_CONTROLS; i++) {
527 				rdmsr(p4_controls[i], low, high);
528 				low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
529 				wrmsr(p4_controls[i], low, high);
530 			}
531 		}
532 	} else {
533 		/* logical cpu 1 */
534 		perfctr_msr = MSR_P4_IQ_PERFCTR1;
535 		evntsel_msr = MSR_P4_CRU_ESCR0;
536 		cccr_msr = MSR_P4_IQ_CCCR1;
537 
538 		/* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
539 		if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
540 			cccr_val = P4_CCCR_OVF_PMI0;
541 		else
542 			cccr_val = P4_CCCR_OVF_PMI1;
543 		cccr_val |= P4_CCCR_ESCR_SELECT(4);
544 	}
545 
546 	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
547 	 	| P4_ESCR_OS
548 		| P4_ESCR_USR;
549 
550 	cccr_val |= P4_CCCR_THRESHOLD(15)
551 		 | P4_CCCR_COMPLEMENT
552 		 | P4_CCCR_COMPARE
553 		 | P4_CCCR_REQUIRED;
554 
555 	wrmsr(evntsel_msr, evntsel, 0);
556 	wrmsr(cccr_msr, cccr_val, 0);
557 	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
558 
559 	wd->perfctr_msr = perfctr_msr;
560 	wd->evntsel_msr = evntsel_msr;
561 	wd->cccr_msr = cccr_msr;
562 
563 	/* ok, everything is initialized, announce that we're set */
564 	cpu_nmi_set_wd_enabled();
565 
566 	apic_write(APIC_LVTPC, APIC_DM_NMI);
567 	cccr_val |= P4_CCCR_ENABLE;
568 	wrmsr(cccr_msr, cccr_val, 0);
569 	return 1;
570 }
571 
stop_p4_watchdog(void)572 static void stop_p4_watchdog(void)
573 {
574 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
575 	wrmsr(wd->cccr_msr, 0, 0);
576 	wrmsr(wd->evntsel_msr, 0, 0);
577 }
578 
p4_reserve(void)579 static int p4_reserve(void)
580 {
581 	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
582 		return 0;
583 #ifdef CONFIG_SMP
584 	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
585 		goto fail1;
586 #endif
587 	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
588 		goto fail2;
589 	/* RED-PEN why is ESCR1 not reserved here? */
590 	return 1;
591  fail2:
592 #ifdef CONFIG_SMP
593 	if (smp_num_siblings > 1)
594 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595  fail1:
596 #endif
597 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598 	return 0;
599 }
600 
p4_unreserve(void)601 static void p4_unreserve(void)
602 {
603 #ifdef CONFIG_SMP
604 	if (smp_num_siblings > 1)
605 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
606 #endif
607 	release_evntsel_nmi(MSR_P4_CRU_ESCR0);
608 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
609 }
610 
p4_rearm(struct nmi_watchdog_ctlblk * wd,unsigned nmi_hz)611 static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
612 {
613 	unsigned dummy;
614 	/*
615  	 * P4 quirks:
616 	 * - An overflown perfctr will assert its interrupt
617 	 *   until the OVF flag in its CCCR is cleared.
618 	 * - LVTPC is masked on interrupt and must be
619 	 *   unmasked by the LVTPC handler.
620 	 */
621 	rdmsrl(wd->cccr_msr, dummy);
622 	dummy &= ~P4_CCCR_OVF;
623 	wrmsrl(wd->cccr_msr, dummy);
624 	apic_write(APIC_LVTPC, APIC_DM_NMI);
625 	/* start the cycle over again */
626 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
627 }
628 
629 static const struct wd_ops p4_wd_ops = {
630 	.reserve	= p4_reserve,
631 	.unreserve	= p4_unreserve,
632 	.setup		= setup_p4_watchdog,
633 	.rearm		= p4_rearm,
634 	.stop		= stop_p4_watchdog,
635 	/* RED-PEN this is wrong for the other sibling */
636 	.perfctr	= MSR_P4_BPU_PERFCTR0,
637 	.evntsel	= MSR_P4_BSU_ESCR0,
638 	.checkbit	= 1ULL << 39,
639 };
640 
641 /*
642  * Watchdog using the Intel architected PerfMon.
643  * Used for Core2 and hopefully all future Intel CPUs.
644  */
645 #define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
646 #define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
647 
648 static struct wd_ops intel_arch_wd_ops;
649 
setup_intel_arch_watchdog(unsigned nmi_hz)650 static int setup_intel_arch_watchdog(unsigned nmi_hz)
651 {
652 	unsigned int ebx;
653 	union cpuid10_eax eax;
654 	unsigned int unused;
655 	unsigned int perfctr_msr, evntsel_msr;
656 	unsigned int evntsel;
657 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
658 
659 	/*
660 	 * Check whether the Architectural PerfMon supports
661 	 * Unhalted Core Cycles Event or not.
662 	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
663 	 */
664 	cpuid(10, &(eax.full), &ebx, &unused, &unused);
665 	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
666 	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
667 		return 0;
668 
669 	perfctr_msr = wd_ops->perfctr;
670 	evntsel_msr = wd_ops->evntsel;
671 
672 	wrmsrl(perfctr_msr, 0UL);
673 
674 	evntsel = ARCH_PERFMON_EVENTSEL_INT
675 		| ARCH_PERFMON_EVENTSEL_OS
676 		| ARCH_PERFMON_EVENTSEL_USR
677 		| ARCH_PERFMON_NMI_EVENT_SEL
678 		| ARCH_PERFMON_NMI_EVENT_UMASK;
679 
680 	/* setup the timer */
681 	wrmsr(evntsel_msr, evntsel, 0);
682 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
683 	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
684 
685 	wd->perfctr_msr = perfctr_msr;
686 	wd->evntsel_msr = evntsel_msr;
687 	wd->cccr_msr = 0;  /* unused */
688 
689 	/* ok, everything is initialized, announce that we're set */
690 	cpu_nmi_set_wd_enabled();
691 
692 	apic_write(APIC_LVTPC, APIC_DM_NMI);
693 	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
694 	wrmsr(evntsel_msr, evntsel, 0);
695 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
696 	return 1;
697 }
698 
699 static struct wd_ops intel_arch_wd_ops __read_mostly = {
700 	.reserve	= single_msr_reserve,
701 	.unreserve	= single_msr_unreserve,
702 	.setup		= setup_intel_arch_watchdog,
703 	.rearm		= p6_rearm,
704 	.stop		= single_msr_stop_watchdog,
705 	.perfctr	= MSR_ARCH_PERFMON_PERFCTR1,
706 	.evntsel	= MSR_ARCH_PERFMON_EVENTSEL1,
707 };
708 
probe_nmi_watchdog(void)709 static void probe_nmi_watchdog(void)
710 {
711 	switch (boot_cpu_data.x86_vendor) {
712 	case X86_VENDOR_AMD:
713 		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
714 		    boot_cpu_data.x86 != 16)
715 			return;
716 		wd_ops = &k7_wd_ops;
717 		break;
718 	case X86_VENDOR_INTEL:
719 		/*
720 		 * Work around Core Duo (Yonah) errata AE49 where perfctr1
721 		 * doesn't have a working enable bit.
722 		 */
723 		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
724 			intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
725 			intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
726 		}
727 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
728 			wd_ops = &intel_arch_wd_ops;
729 			break;
730 		}
731 		switch (boot_cpu_data.x86) {
732 		case 6:
733 			if (boot_cpu_data.x86_model > 13)
734 				return;
735 
736 			wd_ops = &p6_wd_ops;
737 			break;
738 		case 15:
739 			wd_ops = &p4_wd_ops;
740 			break;
741 		default:
742 			return;
743 		}
744 		break;
745 	}
746 }
747 
748 /* Interface to nmi.c */
749 
lapic_watchdog_init(unsigned nmi_hz)750 int lapic_watchdog_init(unsigned nmi_hz)
751 {
752 	if (!wd_ops) {
753 		probe_nmi_watchdog();
754 		if (!wd_ops) {
755 			printk(KERN_INFO "NMI watchdog: CPU not supported\n");
756 			return -1;
757 		}
758 
759 		if (!wd_ops->reserve()) {
760 			printk(KERN_ERR
761 				"NMI watchdog: cannot reserve perfctrs\n");
762 			return -1;
763 		}
764 	}
765 
766 	if (!(wd_ops->setup(nmi_hz))) {
767 		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
768 		       raw_smp_processor_id());
769 		return -1;
770 	}
771 
772 	return 0;
773 }
774 
lapic_watchdog_stop(void)775 void lapic_watchdog_stop(void)
776 {
777 	if (wd_ops)
778 		wd_ops->stop();
779 }
780 
lapic_adjust_nmi_hz(unsigned hz)781 unsigned lapic_adjust_nmi_hz(unsigned hz)
782 {
783 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
784 	if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
785 	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
786 		hz = adjust_for_32bit_ctr(hz);
787 	return hz;
788 }
789 
lapic_wd_event(unsigned nmi_hz)790 int __kprobes lapic_wd_event(unsigned nmi_hz)
791 {
792 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
793 	u64 ctr;
794 
795 	rdmsrl(wd->perfctr_msr, ctr);
796 	if (ctr & wd_ops->checkbit) /* perfctr still running? */
797 		return 0;
798 
799 	wd_ops->rearm(wd, nmi_hz);
800 	return 1;
801 }
802 
lapic_watchdog_ok(void)803 int lapic_watchdog_ok(void)
804 {
805 	return wd_ops != NULL;
806 }
807