1 /**
2 * @file op_pmu.c
3 * Setup and handling of IA64 Performance Monitoring Unit (PMU)
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Bob Montgomery
9 * @author Will Cohen
10 * @author John Levon
11 * @author Philippe Elie
12 */
13
14
15 #include "oprofile.h"
16 #include "op_util.h"
17 #include <asm/perfmon.h>
18 #include "op_ia64_model.h"
19
20 /* number of counters physically present */
21 static uint op_nr_counters = 4;
22
23 /* performance counters are in pairs: pmcN and pmdN. The pmc register acts
24 * as the event selection; the pmd register is the counter. */
25 #define perf_reg(c) ((c)+4)
26
27 #define IA64_1_PMD_MASK_VAL ((1UL << 32) - 1)
28 #define IA64_2_PMD_MASK_VAL ((1UL << 47) - 1)
29
30 /* The appropriate value is selected in pmu_init() */
31 unsigned long pmd_mask = IA64_2_PMD_MASK_VAL;
32
33 #define pmd_overflowed(r, c) ((r) & (1 << perf_reg(c)))
34 #define set_pmd_neg(v, c) do { \
35 ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \
36 ia64_srlz_d(); } while (0)
37 #define set_pmd(v, c) do { \
38 ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \
39 ia64_srlz_d(); } while (0)
40 #define set_pmc(v, c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0)
41 #define get_pmd(c) ia64_get_pmd(perf_reg(c))
42 #define get_pmc(c) ia64_get_pmc(perf_reg(c))
43
44 /* ---------------- IRQ handler ------------------ */
45
46 /* The args match the args for pfm_overflow_handler in perfmon.c.
47 * The task_struct is currently filled in with the perfmon "owner" of
48 * the PMU. This might change. I'm not sure it makes sense in perfmon
49 * either with system-wide profiling.
50 * pmc0 is a bit mask for overflowed counters (bits 4-7)
51 * This routine should return 0 to resume interrupts.
52 */
53 inline static void
op_do_pmu_interrupt(u64 pmc0,struct pt_regs * regs)54 op_do_pmu_interrupt(u64 pmc0, struct pt_regs * regs)
55 {
56 uint cpu = op_cpu_id();
57 int ctr;
58
59 for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) {
60 if (pmd_overflowed(pmc0, ctr)) {
61 op_do_profile(cpu, regs->cr_iip, 1, ctr);
62 set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr);
63 }
64 }
65 return;
66 }
67
68
69 static void
op_raw_pmu_interrupt(int irq,void * arg,struct pt_regs * regs)70 op_raw_pmu_interrupt(int irq, void * arg, struct pt_regs * regs)
71 {
72 u64 pmc0;
73
74 pmc0 = ia64_get_pmc(0);
75
76 if ((pmc0 & ~0x1UL) != 0UL) {
77 op_do_pmu_interrupt(pmc0, regs);
78 ia64_set_pmc(0, 0);
79 ia64_srlz_d();
80 }
81 }
82
83
84 #define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2)
85
86 static void
op_set_pmv(void * dummy)87 op_set_pmv(void * dummy)
88 {
89 ia64_set_pmv(MY_OPROFILE_VECTOR);
90 ia64_srlz_d();
91 }
92
93
94 static void
op_restore_pmv(void * dummy)95 op_restore_pmv(void* dummy)
96 {
97 ia64_set_pmv(IA64_PERFMON_VECTOR);
98 ia64_srlz_d();
99 }
100
101
102 static int
install_handler(void)103 install_handler(void)
104 {
105 int err = 0;
106
107 /* Try it legally - confusion about vec vs irq */
108 err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt,
109 SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL);
110
111 if (err) {
112 printk(KERN_ALERT "oprofile_IA64: request_irq fails, "
113 "returns %d\n", err);
114 return err;
115 }
116
117 if ((smp_call_function(op_set_pmv, NULL, 0, 1))) {
118 printk(KERN_ALERT "oprofile_IA64: unexpected failure "
119 "of smp_call_function(op_set_pmv)\n");
120 }
121
122 op_set_pmv(NULL);
123
124 return err;
125 }
126
127
128 static int
restore_handler(void)129 restore_handler(void)
130 {
131 int err = 0;
132
133 if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) {
134 printk(KERN_ALERT "oprofile_IA64: unexpected failure "
135 "of smp_call_function(op_restore_pmv)\n");
136 }
137
138 op_restore_pmv(NULL);
139
140 free_irq(MY_OPROFILE_VECTOR, NULL);
141 return err;
142 }
143
144
145 /* ---------------- PMU setup ------------------ */
146
147 /* This is kind of artificial. The proc interface might really want to
148 * accept register values directly. There are other features not exposed
149 * by this limited interface. Of course that might require all sorts of
150 * validity checking??? */
151 static void
pmc_fill_in(ulong * val,u8 kernel,u8 user,u8 event,u8 um)152 pmc_fill_in(ulong * val, u8 kernel, u8 user, u8 event, u8 um)
153 {
154 /* enable interrupt generation */
155 *val |= (1 << 5);
156
157 /* setup as a privileged monitor */
158 *val |= (1 << 6);
159
160 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
161 * It is supposedly ignored in other pmc registers.
162 * Try assuming it's ignored in Itanium, too, and just
163 * set it for everyone.
164 */
165
166 *val |= (1 << 23);
167
168 /* enable/disable chosen OS and USR counting */
169 (user) ? (*val |= (1 << 3))
170 : (*val &= ~(1 << 3));
171
172 (kernel) ? (*val |= (1 << 0))
173 : (*val &= ~(1 << 0));
174
175 /* what are we counting ? */
176 *val &= ~(0xff << 8);
177 *val |= ((event & 0xff) << 8);
178 *val &= ~(0xf << 16);
179 *val |= ((um & 0xf) << 16);
180 }
181
182
183 static void
pmu_setup(void * dummy)184 pmu_setup(void * dummy)
185 {
186 ulong pmc_val;
187 int ii;
188
189 /* setup each counter */
190 for (ii = 0 ; ii < op_nr_counters ; ++ii) {
191 if (sysctl.ctr[ii].enabled) {
192 pmc_val = 0;
193
194 set_pmd_neg(sysctl.ctr[ii].count, ii);
195 pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel,
196 sysctl.ctr[ii].user, sysctl.ctr[ii].event,
197 sysctl.ctr[ii].unit_mask);
198
199 set_pmc(pmc_val, ii);
200 }
201 }
202 }
203
204
205 void
disable_psr(void * dummy)206 disable_psr(void * dummy)
207 {
208 struct pt_regs * regs;
209 /* disable profiling for my saved state */
210 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
211 regs--;
212 ia64_psr(regs)->pp = 0;
213 /* shouldn't need to */
214 ia64_psr(regs)->up = 0;
215
216 /* disable profiling for my current state */
217 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
218
219 #if defined(CONFIG_PERFMON) && defined(CONFIG_SMP)
220 #if V_AT_LEAST(2, 4, 21)
221 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE;
222 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
223 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
224 #else
225 /* disable profiling for everyone else */
226 local_cpu_data->pfm_syst_wide = 1;
227 local_cpu_data->pfm_dcr_pp = 0;
228 #endif
229 #endif
230 ia64_set_pmc(0, 0);
231 ia64_srlz_d();
232 }
233
234
235 static int
pmu_setup_all(void)236 pmu_setup_all(void)
237 {
238
239 /* This would be a great place to reserve all cpus with
240 * some sort of call to perfmonctl (something like the
241 * CREATE_CONTEXT command). The current interface to
242 * perfmonctl wants to be called from a different task id
243 * for each CPU to be set up (and doesn't allow calls from
244 * modules.
245 */
246
247 /* disable profiling with the psr.pp bit */
248 if ((smp_call_function(disable_psr, NULL, 0, 1)))
249 return -EFAULT;
250
251 disable_psr(NULL);
252
253 /* now I've reserved the PMUs and they should be quiet */
254
255 if ((smp_call_function(pmu_setup, NULL, 0, 1)))
256 return -EFAULT;
257
258 pmu_setup(NULL);
259 return 0;
260 }
261
262
263 #ifndef CONFIG_SMP
264 /* from linux/arch/ia64/kernel/perfmon.c */
265 /*
266 * Originaly Written by Ganesh Venkitachalam, IBM Corp.
267 * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
268 *
269 * Modifications by Stephane Eranian, Hewlett-Packard Co.
270 * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
271 *
272 * Copyright (C) 1999-2002 Hewlett Packard Co
273 * Stephane Eranian <eranian@hpl.hp.com>
274 * David Mosberger-Tang <davidm@hpl.hp.com>
275 */
276
277 /*
278 * On UP kernels, we do not need to constantly set the psr.pp bit
279 * when a task is scheduled. The psr.pp bit can only be changed in
280 * the kernel because of a user request. Given we are on a UP non preeemptive
281 * kernel we know that no other task is running, so we cna simply update their
282 * psr.pp from their saved state. There is this no impact on the context switch
283 * code compared to the SMP case.
284 */
285 static void
op_tasklist_toggle_pp(unsigned int val)286 op_tasklist_toggle_pp(unsigned int val)
287 {
288 struct task_struct * p;
289 struct pt_regs * regs;
290
291 read_lock(&tasklist_lock);
292
293 for_each_task(p) {
294 regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
295
296 /*
297 * position on pt_regs saved on stack on 1st entry into the kernel
298 */
299 regs--;
300
301 /*
302 * update psr.pp
303 */
304 ia64_psr(regs)->pp = val;
305 }
306 read_unlock(&tasklist_lock);
307 }
308 #endif
309
310
311 static void
pmu_start(void * info)312 pmu_start(void * info)
313 {
314 struct pt_regs * regs;
315
316 if (info && (*((uint *)info) != op_cpu_id()))
317 return;
318
319 /* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n",
320 op_cpu_id()); */
321 /* The default control register pp value is copied into psr.pp
322 * on an interrupt. This allows interrupt service routines to
323 * be monitored.
324 */
325 ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP);
326
327 #ifdef CONFIG_PERFMON
328 #ifdef CONFIG_SMP
329 #if V_AT_LEAST(2, 4, 21)
330 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE;
331 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP;
332 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
333 #else
334 local_cpu_data->pfm_syst_wide = 1;
335 local_cpu_data->pfm_dcr_pp = 1;
336 #endif
337 #else
338 op_tasklist_toggle_pp(1);
339 #endif
340 #endif
341 /* set it in my saved state */
342 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
343 regs--;
344 ia64_psr(regs)->pp = 1;
345
346 /* set it in my current state */
347 __asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
348 ia64_srlz_d();
349 }
350
351
352 static void
pmu_stop(void * info)353 pmu_stop(void * info)
354 {
355 struct pt_regs * regs;
356
357 if (info && (*((uint *)info) != op_cpu_id()))
358 return;
359
360 /* stop in my current state */
361 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
362
363 /* disable the dcr pp */
364 ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
365
366 #ifdef CONFIG_PERFMON
367 #ifdef CONFIG_SMP
368 #if V_AT_LEAST(2, 4, 21)
369 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE;
370 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
371 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
372 #else
373 local_cpu_data->pfm_syst_wide = 0;
374 local_cpu_data->pfm_dcr_pp = 0;
375 #endif
376 #else
377 pfm_tasklist_toggle_pp(0);
378 #endif
379 #endif
380
381 /* disable in my saved state */
382 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
383 regs--;
384 ia64_psr(regs)->pp = 0;
385 }
386
387
388 static void
pmu_select_start(uint cpu)389 pmu_select_start(uint cpu)
390 {
391 if (cpu == op_cpu_id())
392 pmu_start(NULL);
393 else
394 smp_call_function(pmu_start, &cpu, 0, 1);
395 }
396
397
398 static void
pmu_select_stop(uint cpu)399 pmu_select_stop(uint cpu)
400 {
401 if (cpu == op_cpu_id())
402 pmu_stop(NULL);
403 else
404 smp_call_function(pmu_stop, &cpu, 0, 1);
405 }
406
407
408 static void
pmu_start_all(void)409 pmu_start_all(void)
410 {
411 int cpu, i;
412
413 for (cpu=0; cpu < smp_num_cpus; cpu++) {
414 struct _oprof_data * data = &oprof_data[cpu];
415
416 for (i = 0 ; i < op_nr_counters ; ++i) {
417 if (sysctl.ctr[i].enabled) {
418 data->ctr_count[i] = sysctl.ctr[i].count;
419 } else {
420 data->ctr_count[i] = 0;
421 }
422 }
423 }
424
425 if (!install_handler()) {
426 smp_call_function(pmu_start, NULL, 0, 1);
427 pmu_start(NULL);
428 }
429 /* FIXME need some way to fail here */;
430 }
431
432
433 static void
pmu_stop_all(void)434 pmu_stop_all(void)
435 {
436 smp_call_function(pmu_stop, NULL, 0, 1);
437 pmu_stop(NULL);
438 restore_handler();
439 }
440
441
442 static int
pmu_check_params(void)443 pmu_check_params(void)
444 {
445 int i;
446 int enabled = 0;
447
448 for (i = 0; i < op_nr_counters ; i++) {
449 if (!sysctl.ctr[i].enabled)
450 continue;
451
452 enabled = 1;
453
454 if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) {
455 printk(KERN_ERR "oprofile: neither kernel nor user "
456 "set for counter %d\n", i);
457 return -EINVAL;
458 }
459
460 if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT,
461 "ctr count value %d not in range (%d %ld)\n"))
462 return -EINVAL;
463 }
464
465 if (!enabled) {
466 printk(KERN_ERR "oprofile: no counters have been enabled.\n");
467 return -EINVAL;
468 }
469
470 return 0;
471 }
472
473
474 static struct op_msrs cpu_msrs[NR_CPUS];
475
476
free_msr_group(struct op_msr_group * group)477 static void free_msr_group(struct op_msr_group * group)
478 {
479 if (group->addrs)
480 kfree(group->addrs);
481 if (group->saved)
482 kfree(group->saved);
483 group->addrs = NULL;
484 group->saved = NULL;
485 }
486
487
pmu_save_registers(void * dummy)488 static void pmu_save_registers(void * dummy)
489 {
490 uint i;
491 uint const cpu = op_cpu_id();
492 struct op_msr_group * counters = &cpu_msrs[cpu].counters;
493 struct op_msr_group * controls = &cpu_msrs[cpu].controls;
494
495 counters->addrs = NULL;
496 counters->saved = NULL;
497 controls->addrs = NULL;
498 controls->saved = NULL;
499
500 counters->saved = kmalloc(
501 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
502 if (!counters->saved)
503 goto fault;
504
505 controls->saved = kmalloc(
506 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
507 if (!controls->saved)
508 goto fault;
509
510 for (i = 0; i < op_nr_counters; ++i) {
511 controls->saved[i].low = get_pmc(i);
512 counters->saved[i].low = get_pmd(i);
513 }
514 return;
515
516 fault:
517 free_msr_group(counters);
518 free_msr_group(controls);
519 }
520
521
pmu_restore_registers(void * dummy)522 static void pmu_restore_registers(void * dummy)
523 {
524 uint i;
525 uint const cpu = op_cpu_id();
526 struct op_msr_group * counters = &cpu_msrs[cpu].counters;
527 struct op_msr_group * controls = &cpu_msrs[cpu].controls;
528
529 for (i = 0; i < op_nr_counters; ++i) {
530 set_pmc(controls->saved[i].low, i);
531 set_pmd(counters->saved[i].low, i);
532 }
533
534 free_msr_group(counters);
535 free_msr_group(controls);
536 }
537
538
539
540 static int
pmu_init(void)541 pmu_init(void)
542 {
543 int err = 0;
544
545 /* figure out processor type configure number of bits in pmd
546 and number of counters */
547 switch (get_cpu_type()) {
548 case CPU_IA64_1:
549 pmd_mask = IA64_1_PMD_MASK_VAL; break;
550 case CPU_IA64_2:
551 case CPU_IA64:
552 pmd_mask = IA64_2_PMD_MASK_VAL; break;
553 default:
554 err = -EIO; break;
555 }
556
557 op_nr_counters = 4;
558
559 if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1)))
560 goto out;
561
562 pmu_save_registers(NULL);
563
564 out:
565 return err;
566 }
567
568
569 static void
pmu_deinit(void)570 pmu_deinit(void)
571 {
572 smp_call_function(pmu_restore_registers, NULL, 0, 1);
573 pmu_restore_registers(NULL);
574 }
575
576
577 static char * names[] = { "0", "1", "2", "3", };
578
579
580 static int
pmu_add_sysctls(ctl_table * next)581 pmu_add_sysctls(ctl_table * next)
582 {
583 ctl_table * start = next;
584 ctl_table * tab;
585 int i, j;
586
587 for (i=0; i < op_nr_counters; i++) {
588 next->ctl_name = 1;
589 next->procname = names[i];
590 next->mode = 0700;
591
592 if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL)))
593 goto cleanup;
594
595 next->child = tab;
596
597 memset(tab, 0, sizeof(ctl_table)*7);
598 tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
599 tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
600 tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
601 tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
602 tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
603 tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
604 next++;
605 }
606
607 return 0;
608
609 cleanup:
610 next = start;
611 for (j = 0; j < i; j++) {
612 kfree(next->child);
613 next++;
614 }
615 return -EFAULT;
616 }
617
618
pmu_remove_sysctls(ctl_table * next)619 static void pmu_remove_sysctls(ctl_table * next)
620 {
621 int ii;
622
623 for (ii=0; ii < op_nr_counters; ii++) {
624 kfree(next->child);
625 next++;
626 }
627 }
628
629
630 struct op_int_operations op_nmi_ops = {
631 init: pmu_init,
632 deinit: pmu_deinit,
633 add_sysctls: pmu_add_sysctls,
634 remove_sysctls: pmu_remove_sysctls,
635 check_params: pmu_check_params,
636 setup: pmu_setup_all,
637 start: pmu_start_all,
638 stop: pmu_stop_all,
639 start_cpu: pmu_select_start,
640 stop_cpu: pmu_select_stop,
641 };
642
643
op_int_interface()644 struct op_int_operations const * op_int_interface()
645 {
646 return &op_nmi_ops;
647 }
648
649 /* Need this dummy so module/oprofile.c links */
650 struct op_int_operations op_rtc_ops = {
651 init: NULL,
652 deinit: NULL,
653 add_sysctls: NULL,
654 remove_sysctls: NULL,
655 check_params: NULL,
656 setup: NULL,
657 start: NULL,
658 stop: NULL,
659 start_cpu: NULL,
660 stop_cpu: NULL,
661 };
662