• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * @file op_pmu.c
3  * Setup and handling of IA64 Performance Monitoring Unit (PMU)
4  *
5  * @remark Copyright 2002 OProfile authors
6  * @remark Read the file COPYING
7  *
8  * @author Bob Montgomery
9  * @author Will Cohen
10  * @author John Levon
11  * @author Philippe Elie
12  */
13 
14 
15 #include "oprofile.h"
16 #include "op_util.h"
17 #include <asm/perfmon.h>
18 #include "op_ia64_model.h"
19 
20 /* number of counters physically present */
21 static uint op_nr_counters = 4;
22 
23 /* performance counters are in pairs: pmcN and pmdN.  The pmc register acts
24  * as the event selection; the pmd register is the counter. */
25 #define perf_reg(c)	((c)+4)
26 
27 #define IA64_1_PMD_MASK_VAL	((1UL << 32) - 1)
28 #define IA64_2_PMD_MASK_VAL	((1UL << 47) - 1)
29 
30 /* The appropriate value is selected in pmu_init() */
31 unsigned long pmd_mask = IA64_2_PMD_MASK_VAL;
32 
33 #define pmd_overflowed(r, c) ((r) & (1 << perf_reg(c)))
34 #define set_pmd_neg(v, c) do { \
35 	ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \
36 	ia64_srlz_d(); } while (0)
37 #define set_pmd(v, c) do { \
38 	ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \
39 	ia64_srlz_d(); } while (0)
40 #define set_pmc(v, c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0)
41 #define get_pmd(c) ia64_get_pmd(perf_reg(c))
42 #define get_pmc(c) ia64_get_pmc(perf_reg(c))
43 
44 /* ---------------- IRQ handler ------------------ */
45 
46 /* The args match the args for pfm_overflow_handler in perfmon.c.
47  * The task_struct is currently filled in with the perfmon "owner" of
48  * the PMU.  This might change.  I'm not sure it makes sense in perfmon
49  * either with system-wide profiling.
50  * pmc0 is a bit mask for overflowed counters (bits 4-7)
51  * This routine should return 0 to resume interrupts.
52  */
53 inline static void
op_do_pmu_interrupt(u64 pmc0,struct pt_regs * regs)54 op_do_pmu_interrupt(u64 pmc0, struct pt_regs * regs)
55 {
56 	uint cpu = op_cpu_id();
57 	int ctr;
58 
59 	for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) {
60 		if (pmd_overflowed(pmc0, ctr)) {
61 			op_do_profile(cpu, regs->cr_iip, 1, ctr);
62 			set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr);
63 		}
64 	}
65 	return;
66 }
67 
68 
69 static void
op_raw_pmu_interrupt(int irq,void * arg,struct pt_regs * regs)70 op_raw_pmu_interrupt(int irq, void * arg, struct pt_regs * regs)
71 {
72 	u64 pmc0;
73 
74 	pmc0 = ia64_get_pmc(0);
75 
76 	if ((pmc0 & ~0x1UL) != 0UL) {
77 		op_do_pmu_interrupt(pmc0, regs);
78 		ia64_set_pmc(0, 0);
79 		ia64_srlz_d();
80 	}
81 }
82 
83 
84 #define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2)
85 
86 static void
op_set_pmv(void * dummy)87 op_set_pmv(void * dummy)
88 {
89 	ia64_set_pmv(MY_OPROFILE_VECTOR);
90 	ia64_srlz_d();
91 }
92 
93 
94 static void
op_restore_pmv(void * dummy)95 op_restore_pmv(void* dummy)
96 {
97 	ia64_set_pmv(IA64_PERFMON_VECTOR);
98 	ia64_srlz_d();
99 }
100 
101 
102 static int
install_handler(void)103 install_handler(void)
104 {
105 	int err = 0;
106 
107 	/* Try it legally - confusion about vec vs irq */
108 	err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt,
109 			SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL);
110 
111 	if (err) {
112 		printk(KERN_ALERT "oprofile_IA64: request_irq fails, "
113 				"returns %d\n", err);
114 		return err;
115 	}
116 
117 	if ((smp_call_function(op_set_pmv, NULL, 0, 1))) {
118 		printk(KERN_ALERT "oprofile_IA64: unexpected failure "
119 				"of smp_call_function(op_set_pmv)\n");
120 	}
121 
122 	op_set_pmv(NULL);
123 
124 	return err;
125 }
126 
127 
128 static int
restore_handler(void)129 restore_handler(void)
130 {
131 	int err = 0;
132 
133 	if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) {
134 		printk(KERN_ALERT "oprofile_IA64: unexpected failure "
135 				"of smp_call_function(op_restore_pmv)\n");
136 	}
137 
138 	op_restore_pmv(NULL);
139 
140 	free_irq(MY_OPROFILE_VECTOR, NULL);
141 	return err;
142 }
143 
144 
145 /* ---------------- PMU setup ------------------ */
146 
147 /* This is kind of artificial.  The proc interface might really want to
148  * accept register values directly.  There are other features not exposed
149  * by this limited interface.  Of course that might require all sorts of
150  * validity checking??? */
151 static void
pmc_fill_in(ulong * val,u8 kernel,u8 user,u8 event,u8 um)152 pmc_fill_in(ulong * val, u8 kernel, u8 user, u8 event, u8 um)
153 {
154 	/* enable interrupt generation */
155 	*val |= (1 << 5);
156 
157 	/* setup as a privileged monitor */
158 	*val |= (1 << 6);
159 
160 	/* McKinley requires pmc4 to have bit 23 set (enable PMU).
161 	 * It is supposedly ignored in other pmc registers.
162 	 * Try assuming it's ignored in Itanium, too, and just
163 	 * set it for everyone.
164 	 */
165 
166 	*val |= (1 << 23);
167 
168 	/* enable/disable chosen OS and USR counting */
169 	(user)   ? (*val |= (1 << 3))
170 		 : (*val &= ~(1 << 3));
171 
172 	(kernel) ? (*val |= (1 << 0))
173 		 : (*val &= ~(1 << 0));
174 
175 	/* what are we counting ? */
176 	*val &= ~(0xff << 8);
177 	*val |= ((event & 0xff) << 8);
178 	*val &= ~(0xf << 16);
179 	*val |= ((um & 0xf) << 16);
180 }
181 
182 
183 static void
pmu_setup(void * dummy)184 pmu_setup(void * dummy)
185 {
186 	ulong pmc_val;
187 	int ii;
188 
189 	/* setup each counter */
190 	for (ii = 0 ; ii < op_nr_counters ; ++ii) {
191 		if (sysctl.ctr[ii].enabled) {
192 			pmc_val = 0;
193 
194 			set_pmd_neg(sysctl.ctr[ii].count, ii);
195 			pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel,
196 				sysctl.ctr[ii].user, sysctl.ctr[ii].event,
197 				sysctl.ctr[ii].unit_mask);
198 
199 			set_pmc(pmc_val, ii);
200 		}
201 	}
202 }
203 
204 
205 void
disable_psr(void * dummy)206 disable_psr(void * dummy)
207 {
208 	struct pt_regs * regs;
209 	/* disable profiling for my saved state */
210 	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
211 	regs--;
212 	ia64_psr(regs)->pp = 0;
213 	/* shouldn't need to */
214 	ia64_psr(regs)->up = 0;
215 
216 	/* disable profiling for my current state */
217 	__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
218 
219 #if defined(CONFIG_PERFMON) && defined(CONFIG_SMP)
220 #if V_AT_LEAST(2, 4, 21)
221 	local_cpu_data->pfm_syst_info |=  PFM_CPUINFO_SYST_WIDE;
222 	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
223 	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
224 #else
225 	/* disable profiling for everyone else */
226 	local_cpu_data->pfm_syst_wide = 1;
227 	local_cpu_data->pfm_dcr_pp = 0;
228 #endif
229 #endif
230 	ia64_set_pmc(0, 0);
231 	ia64_srlz_d();
232 }
233 
234 
235 static int
pmu_setup_all(void)236 pmu_setup_all(void)
237 {
238 
239 	/* This would be a great place to reserve all cpus with
240 	 * some sort of call to perfmonctl (something like the
241 	 * CREATE_CONTEXT command).  The current interface to
242 	 * perfmonctl wants to be called from a different task id
243 	 * for each CPU to be set up (and doesn't allow calls from
244 	 * modules.
245 	 */
246 
247 	/* disable profiling with the psr.pp bit */
248 	if ((smp_call_function(disable_psr, NULL, 0, 1)))
249 		return -EFAULT;
250 
251 	disable_psr(NULL);
252 
253 	/* now I've reserved the PMUs and they should be quiet */
254 
255 	if ((smp_call_function(pmu_setup, NULL, 0, 1)))
256 		return -EFAULT;
257 
258 	pmu_setup(NULL);
259 	return 0;
260 }
261 
262 
263 #ifndef CONFIG_SMP
264 /* from linux/arch/ia64/kernel/perfmon.c */
265 /*
266  * Originaly Written by Ganesh Venkitachalam, IBM Corp.
267  * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
268  *
269  * Modifications by Stephane Eranian, Hewlett-Packard Co.
270  * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
271  *
272  * Copyright (C) 1999-2002  Hewlett Packard Co
273  *               Stephane Eranian <eranian@hpl.hp.com>
274  *               David Mosberger-Tang <davidm@hpl.hp.com>
275  */
276 
277 /*
278  * On UP kernels, we do not need to constantly set the psr.pp bit
279  * when a task is scheduled. The psr.pp bit can only be changed in
280  * the kernel because of a user request. Given we are on a UP non preeemptive
281  * kernel we know that no other task is running, so we cna simply update their
282  * psr.pp from their saved state. There is this no impact on the context switch
283  * code compared to the SMP case.
284  */
285 static void
op_tasklist_toggle_pp(unsigned int val)286 op_tasklist_toggle_pp(unsigned int val)
287 {
288 	struct task_struct * p;
289 	struct pt_regs * regs;
290 
291 	read_lock(&tasklist_lock);
292 
293 	for_each_task(p) {
294 		regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
295 
296 		/*
297 		 * position on pt_regs saved on stack on 1st entry into the kernel
298 		 */
299 		regs--;
300 
301 		/*
302 		 * update psr.pp
303 		 */
304 		ia64_psr(regs)->pp = val;
305 	}
306 	read_unlock(&tasklist_lock);
307 }
308 #endif
309 
310 
311 static void
pmu_start(void * info)312 pmu_start(void * info)
313 {
314 	struct pt_regs * regs;
315 
316 	if (info && (*((uint *)info) != op_cpu_id()))
317 		return;
318 
319 	/* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n",
320 	  	op_cpu_id()); */
321 	/* The default control register pp value is copied into psr.pp
322 	 * on an interrupt.  This allows interrupt service routines to
323 	 * be monitored.
324 	 */
325 	ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP);
326 
327 #ifdef CONFIG_PERFMON
328 #ifdef CONFIG_SMP
329 #if V_AT_LEAST(2, 4, 21)
330 	local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE;
331 	local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP;
332 	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
333 #else
334 	local_cpu_data->pfm_syst_wide = 1;
335 	local_cpu_data->pfm_dcr_pp = 1;
336 #endif
337 #else
338 	op_tasklist_toggle_pp(1);
339 #endif
340 #endif
341 	/* set it in my saved state */
342 	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
343 	regs--;
344 	ia64_psr(regs)->pp = 1;
345 
346 	/* set it in my current state */
347 	__asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
348 	ia64_srlz_d();
349 }
350 
351 
352 static void
pmu_stop(void * info)353 pmu_stop(void * info)
354 {
355 	struct pt_regs * regs;
356 
357 	if (info && (*((uint *)info) != op_cpu_id()))
358 		return;
359 
360 	/* stop in my current state */
361 	__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
362 
363 	/* disable the dcr pp */
364 	ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
365 
366 #ifdef CONFIG_PERFMON
367 #ifdef CONFIG_SMP
368 #if V_AT_LEAST(2, 4, 21)
369 	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE;
370 	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
371 	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
372 #else
373 	local_cpu_data->pfm_syst_wide = 0;
374 	local_cpu_data->pfm_dcr_pp = 0;
375 #endif
376 #else
377 	pfm_tasklist_toggle_pp(0);
378 #endif
379 #endif
380 
381 	/* disable in my saved state */
382 	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
383 	regs--;
384 	ia64_psr(regs)->pp = 0;
385 }
386 
387 
388 static void
pmu_select_start(uint cpu)389 pmu_select_start(uint cpu)
390 {
391 	if (cpu == op_cpu_id())
392 		pmu_start(NULL);
393 	else
394 		smp_call_function(pmu_start, &cpu, 0, 1);
395 }
396 
397 
398 static void
pmu_select_stop(uint cpu)399 pmu_select_stop(uint cpu)
400 {
401 	if (cpu == op_cpu_id())
402 		pmu_stop(NULL);
403 	else
404 		smp_call_function(pmu_stop, &cpu, 0, 1);
405 }
406 
407 
408 static void
pmu_start_all(void)409 pmu_start_all(void)
410 {
411 	int cpu, i;
412 
413 	for (cpu=0; cpu < smp_num_cpus; cpu++) {
414 		struct _oprof_data * data = &oprof_data[cpu];
415 
416 		for (i = 0 ; i < op_nr_counters ; ++i) {
417 			if (sysctl.ctr[i].enabled) {
418 				data->ctr_count[i] = sysctl.ctr[i].count;
419 			} else {
420 				data->ctr_count[i] = 0;
421 			}
422 		}
423 	}
424 
425 	if (!install_handler()) {
426 		smp_call_function(pmu_start, NULL, 0, 1);
427 		pmu_start(NULL);
428 	}
429 		/* FIXME need some way to fail here */;
430 }
431 
432 
433 static void
pmu_stop_all(void)434 pmu_stop_all(void)
435 {
436 	smp_call_function(pmu_stop, NULL, 0, 1);
437 	pmu_stop(NULL);
438 	restore_handler();
439 }
440 
441 
442 static int
pmu_check_params(void)443 pmu_check_params(void)
444 {
445 	int i;
446 	int enabled = 0;
447 
448 	for (i = 0; i < op_nr_counters ; i++) {
449 		if (!sysctl.ctr[i].enabled)
450 			continue;
451 
452 		enabled = 1;
453 
454 		if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) {
455 			printk(KERN_ERR "oprofile: neither kernel nor user "
456 			       "set for counter %d\n", i);
457 			return -EINVAL;
458 		}
459 
460 		if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT,
461 			"ctr count value %d not in range (%d %ld)\n"))
462 			return -EINVAL;
463 	}
464 
465 	if (!enabled) {
466 		printk(KERN_ERR "oprofile: no counters have been enabled.\n");
467 		return -EINVAL;
468 	}
469 
470 	return 0;
471 }
472 
473 
474 static struct op_msrs cpu_msrs[NR_CPUS];
475 
476 
free_msr_group(struct op_msr_group * group)477 static void free_msr_group(struct op_msr_group * group)
478 {
479 	if (group->addrs)
480 		kfree(group->addrs);
481 	if (group->saved)
482 		kfree(group->saved);
483 	group->addrs = NULL;
484 	group->saved = NULL;
485 }
486 
487 
pmu_save_registers(void * dummy)488 static void pmu_save_registers(void * dummy)
489 {
490 	uint i;
491 	uint const cpu = op_cpu_id();
492 	struct op_msr_group * counters = &cpu_msrs[cpu].counters;
493 	struct op_msr_group * controls = &cpu_msrs[cpu].controls;
494 
495 	counters->addrs = NULL;
496 	counters->saved = NULL;
497 	controls->addrs = NULL;
498 	controls->saved = NULL;
499 
500 	counters->saved = kmalloc(
501 		op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
502 	if (!counters->saved)
503 		goto fault;
504 
505 	controls->saved = kmalloc(
506 		op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
507 	if (!controls->saved)
508 		goto fault;
509 
510 	for (i = 0; i < op_nr_counters; ++i) {
511 		controls->saved[i].low = get_pmc(i);
512 		counters->saved[i].low = get_pmd(i);
513 	}
514 	return;
515 
516 fault:
517 	free_msr_group(counters);
518 	free_msr_group(controls);
519 }
520 
521 
pmu_restore_registers(void * dummy)522 static void pmu_restore_registers(void * dummy)
523 {
524 	uint i;
525 	uint const cpu = op_cpu_id();
526 	struct op_msr_group * counters = &cpu_msrs[cpu].counters;
527 	struct op_msr_group * controls = &cpu_msrs[cpu].controls;
528 
529 	for (i = 0; i < op_nr_counters; ++i) {
530 		set_pmc(controls->saved[i].low, i);
531 		set_pmd(counters->saved[i].low, i);
532 	}
533 
534 	free_msr_group(counters);
535 	free_msr_group(controls);
536 }
537 
538 
539 
540 static int
pmu_init(void)541 pmu_init(void)
542 {
543 	int err = 0;
544 
545 	/* figure out processor type configure number of bits in pmd
546 	   and number of counters */
547 	switch (get_cpu_type()) {
548 	case CPU_IA64_1:
549 		pmd_mask = IA64_1_PMD_MASK_VAL; break;
550 	case CPU_IA64_2:
551 	case CPU_IA64:
552 		pmd_mask = IA64_2_PMD_MASK_VAL; break;
553 	default:
554 		err = -EIO; break;
555 	}
556 
557 	op_nr_counters = 4;
558 
559 	if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1)))
560 		goto out;
561 
562 	pmu_save_registers(NULL);
563 
564 out:
565 	return err;
566 }
567 
568 
569 static void
pmu_deinit(void)570 pmu_deinit(void)
571 {
572 	smp_call_function(pmu_restore_registers, NULL, 0, 1);
573 	pmu_restore_registers(NULL);
574 }
575 
576 
577 static char * names[] = { "0", "1", "2", "3", };
578 
579 
580 static int
pmu_add_sysctls(ctl_table * next)581 pmu_add_sysctls(ctl_table * next)
582 {
583 	ctl_table * start = next;
584 	ctl_table * tab;
585 	int i, j;
586 
587 	for (i=0; i < op_nr_counters; i++) {
588 		next->ctl_name = 1;
589 		next->procname = names[i];
590 		next->mode = 0700;
591 
592 		if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL)))
593 			goto cleanup;
594 
595 		next->child = tab;
596 
597 		memset(tab, 0, sizeof(ctl_table)*7);
598 		tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
599 		tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL,  });
600 		tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
601 		tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
602 		tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
603 		tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
604 		next++;
605 	}
606 
607 	return 0;
608 
609 cleanup:
610 	next = start;
611 	for (j = 0; j < i; j++) {
612 		kfree(next->child);
613 		next++;
614 	}
615 	return -EFAULT;
616 }
617 
618 
pmu_remove_sysctls(ctl_table * next)619 static void pmu_remove_sysctls(ctl_table * next)
620 {
621 	int ii;
622 
623 	for (ii=0; ii < op_nr_counters; ii++) {
624 		kfree(next->child);
625 		next++;
626 	}
627 }
628 
629 
630 struct op_int_operations op_nmi_ops = {
631 	init: pmu_init,
632 	deinit: pmu_deinit,
633 	add_sysctls: pmu_add_sysctls,
634 	remove_sysctls: pmu_remove_sysctls,
635 	check_params: pmu_check_params,
636 	setup: pmu_setup_all,
637 	start: pmu_start_all,
638 	stop: pmu_stop_all,
639 	start_cpu: pmu_select_start,
640 	stop_cpu: pmu_select_stop,
641 };
642 
643 
op_int_interface()644 struct op_int_operations const * op_int_interface()
645 {
646 	return &op_nmi_ops;
647 }
648 
649 /* Need this dummy so module/oprofile.c links */
650 struct op_int_operations op_rtc_ops = {
651 	init: NULL,
652 	deinit: NULL,
653 	add_sysctls: NULL,
654 	remove_sysctls: NULL,
655 	check_params: NULL,
656 	setup: NULL,
657 	start: NULL,
658 	stop: NULL,
659 	start_cpu: NULL,
660 	stop_cpu: NULL,
661 };
662