1 /**
2 * @file opd_perfmon.c
3 * perfmonctl() handling
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11 #ifdef __ia64__
12
13 /* need this for sched_setaffinity() in <sched.h> */
14 #define _GNU_SOURCE
15
16 #include "oprofiled.h"
17 #include "opd_perfmon.h"
18 #include "opd_events.h"
19
20 #include "op_cpu_type.h"
21 #include "op_libiberty.h"
22 #include "op_hw_config.h"
23
24 #include <sys/syscall.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <limits.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <errno.h>
33 #ifdef HAVE_SCHED_SETAFFINITY
34 #include <sched.h>
35 #endif
36
37 extern op_cpu cpu_type;
38
39 #ifndef HAVE_SCHED_SETAFFINITY
40
41 /* many glibc's are not yet up to date */
42 #ifndef __NR_sched_setaffinity
43 #define __NR_sched_setaffinity 1231
44 #endif
45
46 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
47 #define CPU_SETSIZE 1024
48 #define __NCPUBITS (8 * sizeof (unsigned long))
49 typedef struct
50 {
51 unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
52 } cpu_set_t;
53
54 #define CPU_SET(cpu, cpusetp) \
55 ((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
56 #define CPU_ZERO(cpusetp) \
57 memset((cpusetp), 0, sizeof(cpu_set_t))
58
59 static int
sched_setaffinity(pid_t pid,size_t len,cpu_set_t const * cpusetp)60 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
61 {
62 return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
63 }
64 #endif
65
66
67 #ifndef HAVE_PERFMONCTL
68 #ifndef __NR_perfmonctl
69 #define __NR_perfmonctl 1175
70 #endif
71
perfmonctl(int fd,int cmd,void * arg,int narg)72 static int perfmonctl(int fd, int cmd, void * arg, int narg)
73 {
74 return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
75 }
76 #endif
77
78
79 static unsigned char uuid[16] = {
80 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
81 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
82 };
83
84
85 static size_t nr_cpus;
86
87 struct child {
88 pid_t pid;
89 int up_pipe[2];
90 int ctx_fd;
91 sig_atomic_t sigusr1;
92 sig_atomic_t sigusr2;
93 sig_atomic_t sigterm;
94 };
95
96 static struct child * children;
97
perfmon_start_child(int ctx_fd)98 static void perfmon_start_child(int ctx_fd)
99 {
100 if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
101 perror("Couldn't start perfmon: ");
102 exit(EXIT_FAILURE);
103 }
104 }
105
106
perfmon_stop_child(int ctx_fd)107 static void perfmon_stop_child(int ctx_fd)
108 {
109 if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
110 perror("Couldn't stop perfmon: ");
111 exit(EXIT_FAILURE);
112 }
113 }
114
115
child_sigusr1(int val)116 static void child_sigusr1(int val __attribute__((unused)))
117 {
118 size_t i;
119
120 for (i = 0; i < nr_cpus; ++i) {
121 if (children[i].pid == getpid()) {
122 children[i].sigusr1 = 1;
123 return;
124 }
125 }
126 }
127
128
child_sigusr2(int val)129 static void child_sigusr2(int val __attribute__((unused)))
130 {
131 size_t i;
132
133 for (i = 0; i < nr_cpus; ++i) {
134 if (children[i].pid == getpid()) {
135 children[i].sigusr2 = 1;
136 return;
137 }
138 }
139 }
140
141
child_sigterm(int val)142 static void child_sigterm(int val __attribute__((unused)))
143 {
144 printf("Child received SIGTERM, killing parent.\n");
145 kill(getppid(), SIGTERM);
146 }
147
148
set_affinity(size_t cpu)149 static void set_affinity(size_t cpu)
150 {
151 cpu_set_t set;
152
153 CPU_ZERO(&set);
154 CPU_SET(cpu, &set);
155
156 int err = sched_setaffinity(getpid(), sizeof(set), &set);
157
158 if (err == -1) {
159 fprintf(stderr, "Failed to set affinity: %s\n",
160 strerror(errno));
161 exit(EXIT_FAILURE);
162 }
163 }
164
165
setup_signals(void)166 static void setup_signals(void)
167 {
168 struct sigaction act;
169 sigset_t mask;
170
171 sigemptyset(&mask);
172 sigaddset(&mask, SIGUSR1);
173 sigaddset(&mask, SIGUSR2);
174 sigprocmask(SIG_BLOCK, &mask, NULL);
175
176 act.sa_handler = child_sigusr1;
177 act.sa_flags = 0;
178 sigemptyset(&act.sa_mask);
179
180 if (sigaction(SIGUSR1, &act, NULL)) {
181 perror("oprofiled: install of SIGUSR1 handler failed: ");
182 exit(EXIT_FAILURE);
183 }
184
185 act.sa_handler = child_sigusr2;
186 act.sa_flags = 0;
187 sigemptyset(&act.sa_mask);
188
189 if (sigaction(SIGUSR2, &act, NULL)) {
190 perror("oprofiled: install of SIGUSR2 handler failed: ");
191 exit(EXIT_FAILURE);
192 }
193
194 act.sa_handler = child_sigterm;
195 act.sa_flags = 0;
196 sigemptyset(&act.sa_mask);
197
198 if (sigaction(SIGTERM, &act, NULL)) {
199 perror("oprofiled: install of SIGTERM handler failed: ");
200 exit(EXIT_FAILURE);
201 }
202 }
203
204
205 /** create the per-cpu context */
create_context(struct child * self)206 static void create_context(struct child * self)
207 {
208 pfarg_context_t ctx;
209 int err;
210
211 memset(&ctx, 0, sizeof(pfarg_context_t));
212 memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
213 ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
214
215 err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
216 if (err == -1) {
217 fprintf(stderr, "CREATE_CONTEXT failed: %s\n",
218 strerror(errno));
219 exit(EXIT_FAILURE);
220 }
221
222 self->ctx_fd = ctx.ctx_fd;
223 }
224
225
226 /** program the perfmon counters */
write_pmu(struct child * self)227 static void write_pmu(struct child * self)
228 {
229 pfarg_reg_t pc[OP_MAX_COUNTERS];
230 pfarg_reg_t pd[OP_MAX_COUNTERS];
231 int err;
232 size_t i;
233
234 memset(pc, 0, sizeof(pc));
235 memset(pd, 0, sizeof(pd));
236
237 #define PMC_GEN_INTERRUPT (1UL << 5)
238 #define PMC_PRIV_MONITOR (1UL << 6)
239 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
240 * It is supposedly ignored in other pmc registers.
241 */
242 #define PMC_MANDATORY (1UL << 23)
243 #define PMC_USER (1UL << 3)
244 #define PMC_KERNEL (1UL << 0)
245 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
246 struct opd_event * event = &opd_events[i];
247 pc[i].reg_num = event->counter + 4;
248 pc[i].reg_value = PMC_GEN_INTERRUPT;
249 pc[i].reg_value |= PMC_PRIV_MONITOR;
250 pc[i].reg_value |= PMC_MANDATORY;
251 (event->user) ? (pc[i].reg_value |= PMC_USER)
252 : (pc[i].reg_value &= ~PMC_USER);
253 (event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
254 : (pc[i].reg_value &= ~PMC_KERNEL);
255 pc[i].reg_value &= ~(0xff << 8);
256 pc[i].reg_value |= ((event->value & 0xff) << 8);
257 pc[i].reg_value &= ~(0xf << 16);
258 pc[i].reg_value |= ((event->um & 0xf) << 16);
259 pc[i].reg_smpl_eventid = event->counter;
260 }
261
262 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
263 struct opd_event * event = &opd_events[i];
264 pd[i].reg_value = ~0UL - event->count + 1;
265 pd[i].reg_short_reset = ~0UL - event->count + 1;
266 pd[i].reg_num = event->counter + 4;
267 }
268
269 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
270 if (err == -1) {
271 perror("Couldn't write PMCs: ");
272 exit(EXIT_FAILURE);
273 }
274
275 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
276 if (err == -1) {
277 perror("Couldn't write PMDs: ");
278 exit(EXIT_FAILURE);
279 }
280 }
281
282
load_context(struct child * self)283 static void load_context(struct child * self)
284 {
285 pfarg_load_t load_args;
286 int err;
287
288 memset(&load_args, 0, sizeof(load_args));
289 load_args.load_pid = self->pid;
290
291 err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
292 if (err == -1) {
293 perror("Couldn't load context: ");
294 exit(EXIT_FAILURE);
295 }
296 }
297
298
notify_parent(struct child * self,size_t cpu)299 static void notify_parent(struct child * self, size_t cpu)
300 {
301 for (;;) {
302 ssize_t ret;
303 ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
304 if (ret == sizeof(size_t))
305 break;
306 if (ret < 0 && errno != EINTR) {
307 fprintf(stderr, "Failed to write child pipe with %s\n",
308 strerror(errno));
309 exit(EXIT_FAILURE);
310 }
311 }
312 }
313
314
run_child(size_t cpu)315 static void run_child(size_t cpu)
316 {
317 struct child * self = &children[cpu];
318
319 self->pid = getpid();
320 self->sigusr1 = 0;
321 self->sigusr2 = 0;
322 self->sigterm = 0;
323
324 setup_signals();
325
326 set_affinity(cpu);
327
328 create_context(self);
329
330 write_pmu(self);
331
332 load_context(self);
333
334 notify_parent(self, cpu);
335
336 for (;;) {
337 sigset_t sigmask;
338 sigfillset(&sigmask);
339 sigdelset(&sigmask, SIGUSR1);
340 sigdelset(&sigmask, SIGUSR2);
341 sigdelset(&sigmask, SIGTERM);
342
343 if (self->sigusr1) {
344 printf("PFM_START on CPU%d\n", (int)cpu);
345 fflush(stdout);
346 perfmon_start_child(self->ctx_fd);
347 self->sigusr1 = 0;
348 }
349
350 if (self->sigusr2) {
351 printf("PFM_STOP on CPU%d\n", (int)cpu);
352 fflush(stdout);
353 perfmon_stop_child(self->ctx_fd);
354 self->sigusr2 = 0;
355 }
356
357 sigsuspend(&sigmask);
358 }
359 }
360
361
wait_for_child(struct child * child)362 static void wait_for_child(struct child * child)
363 {
364 size_t tmp;
365 for (;;) {
366 ssize_t ret;
367 ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
368 if (ret == sizeof(size_t))
369 break;
370 if (ret < 0 && errno != EINTR) {
371 fprintf(stderr, "Failed to read child pipe with %s\n",
372 strerror(errno));
373 exit(EXIT_FAILURE);
374 }
375 }
376 printf("Perfmon child up on CPU%d\n", (int)tmp);
377 fflush(stdout);
378
379 close(child->up_pipe[0]);
380 close(child->up_pipe[1]);
381 }
382
383 static struct child* xen_ctx;
384
perfmon_init(void)385 void perfmon_init(void)
386 {
387 size_t i;
388 long nr;
389
390 if (cpu_type == CPU_TIMER_INT)
391 return;
392
393 if (!no_xen) {
394 xen_ctx = xmalloc(sizeof(struct child));
395 xen_ctx->pid = getpid();
396 xen_ctx->up_pipe[0] = -1;
397 xen_ctx->up_pipe[1] = -1;
398 xen_ctx->sigusr1 = 0;
399 xen_ctx->sigusr2 = 0;
400 xen_ctx->sigterm = 0;
401
402 create_context(xen_ctx);
403
404 write_pmu(xen_ctx);
405
406 load_context(xen_ctx);
407 return;
408 }
409
410
411 nr = sysconf(_SC_NPROCESSORS_ONLN);
412 if (nr == -1) {
413 fprintf(stderr, "Couldn't determine number of CPUs.\n");
414 exit(EXIT_FAILURE);
415 }
416
417 nr_cpus = nr;
418
419 children = xmalloc(sizeof(struct child) * nr_cpus);
420
421 for (i = 0; i < nr_cpus; ++i) {
422 int ret;
423
424 if (pipe(children[i].up_pipe)) {
425 perror("Couldn't create child pipe.\n");
426 exit(EXIT_FAILURE);
427 }
428
429 ret = fork();
430 if (ret == -1) {
431 fprintf(stderr, "Couldn't fork perfmon child.\n");
432 exit(EXIT_FAILURE);
433 } else if (ret == 0) {
434 printf("Running perfmon child on CPU%d.\n", (int)i);
435 fflush(stdout);
436 run_child(i);
437 } else {
438 children[i].pid = ret;
439 printf("Waiting on CPU%d\n", (int)i);
440 wait_for_child(&children[i]);
441 }
442 }
443 }
444
445
perfmon_exit(void)446 void perfmon_exit(void)
447 {
448 size_t i;
449
450 if (cpu_type == CPU_TIMER_INT)
451 return;
452
453 if (!no_xen)
454 return;
455
456 for (i = 0; i < nr_cpus; ++i) {
457 kill(children[i].pid, SIGKILL);
458 waitpid(children[i].pid, NULL, 0);
459 }
460 }
461
462
perfmon_start(void)463 void perfmon_start(void)
464 {
465 size_t i;
466
467 if (cpu_type == CPU_TIMER_INT)
468 return;
469
470 if (!no_xen) {
471 perfmon_start_child(xen_ctx->ctx_fd);
472 return;
473 }
474
475 for (i = 0; i < nr_cpus; ++i)
476 kill(children[i].pid, SIGUSR1);
477 }
478
479
perfmon_stop(void)480 void perfmon_stop(void)
481 {
482 size_t i;
483
484 if (cpu_type == CPU_TIMER_INT)
485 return;
486
487 if (!no_xen) {
488 perfmon_stop_child(xen_ctx->ctx_fd);
489 return;
490 }
491
492 for (i = 0; i < nr_cpus; ++i)
493 kill(children[i].pid, SIGUSR2);
494 }
495
496 #endif /* __ia64__ */
497