1 /**
2 * @file opd_perfmon.c
3 * perfmonctl() handling
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11 #ifdef __ia64__
12
13 /* need this for sched_setaffinity() in <sched.h> */
14 #define _GNU_SOURCE
15
16 #include "oprofiled.h"
17 #include "opd_perfmon.h"
18 #include "opd_events.h"
19
20 #include "op_cpu_type.h"
21 #include "op_libiberty.h"
22 #include "op_hw_config.h"
23
24 #include <sys/syscall.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <limits.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #ifdef HAVE_SCHED_SETAFFINITY
36 #include <sched.h>
37 #endif
38
39 extern op_cpu cpu_type;
40
41 #ifndef HAVE_SCHED_SETAFFINITY
42
43 /* many glibc's are not yet up to date */
44 #ifndef __NR_sched_setaffinity
45 #define __NR_sched_setaffinity 1231
46 #endif
47
48 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
49 #define CPU_SETSIZE 1024
50 #define __NCPUBITS (8 * sizeof (unsigned long))
51 typedef struct
52 {
53 unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
54 } cpu_set_t;
55
56 #define CPU_SET(cpu, cpusetp) \
57 ((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
58 #define CPU_ZERO(cpusetp) \
59 memset((cpusetp), 0, sizeof(cpu_set_t))
60
61 static int
sched_setaffinity(pid_t pid,size_t len,cpu_set_t const * cpusetp)62 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
63 {
64 return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
65 }
66 #endif
67
68
69 #ifndef HAVE_PERFMONCTL
70 #ifndef __NR_perfmonctl
71 #define __NR_perfmonctl 1175
72 #endif
73
perfmonctl(int fd,int cmd,void * arg,int narg)74 static int perfmonctl(int fd, int cmd, void * arg, int narg)
75 {
76 return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
77 }
78 #endif
79
80
81 static unsigned char uuid[16] = {
82 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
83 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
84 };
85
86
87 static size_t nr_cpus;
88
89 struct child {
90 pid_t pid;
91 int up_pipe[2];
92 int ctx_fd;
93 sig_atomic_t sigusr1;
94 sig_atomic_t sigusr2;
95 sig_atomic_t sigterm;
96 };
97
98 static struct child * children;
99
perfmon_start_child(int ctx_fd)100 static void perfmon_start_child(int ctx_fd)
101 {
102 if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
103 exit(EXIT_FAILURE);
104 }
105 }
106
107
perfmon_stop_child(int ctx_fd)108 static void perfmon_stop_child(int ctx_fd)
109 {
110 if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
111 exit(EXIT_FAILURE);
112 }
113 }
114
115
child_sigusr1(int val)116 static void child_sigusr1(int val __attribute__((unused)))
117 {
118 size_t i;
119
120 for (i = 0; i < nr_cpus; ++i) {
121 if (children[i].pid == getpid()) {
122 children[i].sigusr1 = 1;
123 return;
124 }
125 }
126 }
127
128
child_sigusr2(int val)129 static void child_sigusr2(int val __attribute__((unused)))
130 {
131 size_t i;
132
133 for (i = 0; i < nr_cpus; ++i) {
134 if (children[i].pid == getpid()) {
135 children[i].sigusr2 = 1;
136 return;
137 }
138 }
139 }
140
141
child_sigterm(int val)142 static void child_sigterm(int val __attribute__((unused)))
143 {
144 kill(getppid(), SIGTERM);
145 }
146
147
set_affinity(size_t cpu)148 static void set_affinity(size_t cpu)
149 {
150 cpu_set_t set;
151 int err;
152
153 CPU_ZERO(&set);
154 CPU_SET(cpu, &set);
155
156 err = sched_setaffinity(getpid(), sizeof(set), &set);
157
158 if (err == -1) {
159 perror("Failed to set affinity");
160 exit(EXIT_FAILURE);
161 }
162 }
163
164
setup_signals(void)165 static void setup_signals(void)
166 {
167 struct sigaction act;
168 sigset_t mask;
169
170 sigemptyset(&mask);
171 sigaddset(&mask, SIGUSR1);
172 sigaddset(&mask, SIGUSR2);
173 sigprocmask(SIG_BLOCK, &mask, NULL);
174
175 act.sa_handler = child_sigusr1;
176 act.sa_flags = 0;
177 sigemptyset(&act.sa_mask);
178
179 if (sigaction(SIGUSR1, &act, NULL)) {
180 perror("oprofiled: install of SIGUSR1 handler failed");
181 exit(EXIT_FAILURE);
182 }
183
184 act.sa_handler = child_sigusr2;
185 act.sa_flags = 0;
186 sigemptyset(&act.sa_mask);
187
188 if (sigaction(SIGUSR2, &act, NULL)) {
189 perror("oprofiled: install of SIGUSR2 handler failed");
190 exit(EXIT_FAILURE);
191 }
192
193 act.sa_handler = child_sigterm;
194 act.sa_flags = 0;
195 sigemptyset(&act.sa_mask);
196
197 if (sigaction(SIGTERM, &act, NULL)) {
198 perror("oprofiled: install of SIGTERM handler failed");
199 exit(EXIT_FAILURE);
200 }
201 }
202
203
204 /** create the per-cpu context */
create_context(struct child * self)205 static void create_context(struct child * self)
206 {
207 pfarg_context_t ctx;
208 int err;
209
210 memset(&ctx, 0, sizeof(pfarg_context_t));
211 memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
212 ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
213
214 err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
215 if (err == -1) {
216 perror("CREATE_CONTEXT failed");
217 exit(EXIT_FAILURE);
218 }
219
220 self->ctx_fd = ctx.ctx_fd;
221 }
222
223
224 /** program the perfmon counters */
write_pmu(struct child * self)225 static void write_pmu(struct child * self)
226 {
227 pfarg_reg_t pc[OP_MAX_COUNTERS];
228 pfarg_reg_t pd[OP_MAX_COUNTERS];
229 int err;
230 size_t i;
231
232 memset(pc, 0, sizeof(pc));
233 memset(pd, 0, sizeof(pd));
234
235 #define PMC_GEN_INTERRUPT (1UL << 5)
236 #define PMC_PRIV_MONITOR (1UL << 6)
237 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
238 * It is supposedly ignored in other pmc registers.
239 */
240 #define PMC_MANDATORY (1UL << 23)
241 #define PMC_USER (1UL << 3)
242 #define PMC_KERNEL (1UL << 0)
243 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
244 struct opd_event * event = &opd_events[i];
245 pc[i].reg_num = event->counter + 4;
246 pc[i].reg_value = PMC_GEN_INTERRUPT;
247 pc[i].reg_value |= PMC_PRIV_MONITOR;
248 pc[i].reg_value |= PMC_MANDATORY;
249 (event->user) ? (pc[i].reg_value |= PMC_USER)
250 : (pc[i].reg_value &= ~PMC_USER);
251 (event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
252 : (pc[i].reg_value &= ~PMC_KERNEL);
253 pc[i].reg_value &= ~(0xff << 8);
254 pc[i].reg_value |= ((event->value & 0xff) << 8);
255 pc[i].reg_value &= ~(0xf << 16);
256 pc[i].reg_value |= ((event->um & 0xf) << 16);
257 pc[i].reg_smpl_eventid = event->counter;
258 }
259
260 for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
261 struct opd_event * event = &opd_events[i];
262 pd[i].reg_value = ~0UL - event->count + 1;
263 pd[i].reg_short_reset = ~0UL - event->count + 1;
264 pd[i].reg_num = event->counter + 4;
265 }
266
267 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
268 if (err == -1) {
269 perror("Couldn't write PMCs");
270 exit(EXIT_FAILURE);
271 }
272
273 err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
274 if (err == -1) {
275 perror("Couldn't write PMDs");
276 exit(EXIT_FAILURE);
277 }
278 }
279
280
load_context(struct child * self)281 static void load_context(struct child * self)
282 {
283 pfarg_load_t load_args;
284 int err;
285
286 memset(&load_args, 0, sizeof(load_args));
287 load_args.load_pid = self->pid;
288
289 err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
290 if (err == -1) {
291 perror("Couldn't load context");
292 exit(EXIT_FAILURE);
293 }
294 }
295
296
notify_parent(struct child * self,size_t cpu)297 static void notify_parent(struct child * self, size_t cpu)
298 {
299 for (;;) {
300 ssize_t ret;
301 ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
302 if (ret == sizeof(size_t))
303 break;
304 if (ret < 0 && errno != EINTR) {
305 perror("Failed to write child pipe:");
306 exit(EXIT_FAILURE);
307 }
308 }
309 }
310
311 static struct child * inner_child;
close_pipe(void)312 void close_pipe(void)
313 {
314 close(inner_child->up_pipe[1]);
315 }
316
run_child(size_t cpu)317 static void run_child(size_t cpu)
318 {
319 struct child * self = &children[cpu];
320
321 self->pid = getpid();
322 self->sigusr1 = 0;
323 self->sigusr2 = 0;
324 self->sigterm = 0;
325
326 inner_child = self;
327 if (atexit(close_pipe)){
328 close_pipe();
329 exit(EXIT_FAILURE);
330 }
331
332 umask(0);
333 /* Change directory to allow directory to be removed */
334 if (chdir("/") < 0) {
335 perror("Unable to chdir to \"/\"");
336 exit(EXIT_FAILURE);
337 }
338
339 setup_signals();
340
341 set_affinity(cpu);
342
343 create_context(self);
344
345 write_pmu(self);
346
347 load_context(self);
348
349 notify_parent(self, cpu);
350
351 /* Redirect standard files to /dev/null */
352 freopen( "/dev/null", "r", stdin);
353 freopen( "/dev/null", "w", stdout);
354 freopen( "/dev/null", "w", stderr);
355
356 for (;;) {
357 sigset_t sigmask;
358 sigfillset(&sigmask);
359 sigdelset(&sigmask, SIGUSR1);
360 sigdelset(&sigmask, SIGUSR2);
361 sigdelset(&sigmask, SIGTERM);
362
363 if (self->sigusr1) {
364 perfmon_start_child(self->ctx_fd);
365 self->sigusr1 = 0;
366 }
367
368 if (self->sigusr2) {
369 perfmon_stop_child(self->ctx_fd);
370 self->sigusr2 = 0;
371 }
372
373 sigsuspend(&sigmask);
374 }
375 }
376
377
wait_for_child(struct child * child)378 static void wait_for_child(struct child * child)
379 {
380 size_t tmp;
381 for (;;) {
382 ssize_t ret;
383 ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
384 if (ret == sizeof(size_t))
385 break;
386 if ((ret < 0 && errno != EINTR) || ret == 0 ) {
387 perror("Failed to read child pipe");
388 exit(EXIT_FAILURE);
389 }
390 }
391 printf("Perfmon child up on CPU%d\n", (int)tmp);
392 fflush(stdout);
393
394 close(child->up_pipe[0]);
395 }
396
397 static struct child* xen_ctx;
398
perfmon_init(void)399 void perfmon_init(void)
400 {
401 size_t i;
402 long nr;
403
404 if (cpu_type == CPU_TIMER_INT)
405 return;
406
407 if (!no_xen) {
408 xen_ctx = xmalloc(sizeof(struct child));
409 xen_ctx->pid = getpid();
410 xen_ctx->up_pipe[0] = -1;
411 xen_ctx->up_pipe[1] = -1;
412 xen_ctx->sigusr1 = 0;
413 xen_ctx->sigusr2 = 0;
414 xen_ctx->sigterm = 0;
415
416 create_context(xen_ctx);
417
418 write_pmu(xen_ctx);
419
420 load_context(xen_ctx);
421 return;
422 }
423
424
425 nr = sysconf(_SC_NPROCESSORS_ONLN);
426 if (nr == -1) {
427 fprintf(stderr, "Couldn't determine number of CPUs.\n");
428 exit(EXIT_FAILURE);
429 }
430
431 nr_cpus = nr;
432
433 children = xmalloc(sizeof(struct child) * nr_cpus);
434 bzero(children, sizeof(struct child) * nr_cpus);
435
436 for (i = 0; i < nr_cpus; ++i) {
437 int ret;
438
439 if (pipe(children[i].up_pipe)) {
440 perror("Couldn't create child pipe");
441 exit(EXIT_FAILURE);
442 }
443
444 ret = fork();
445 if (ret == -1) {
446 perror("Couldn't fork perfmon child");
447 exit(EXIT_FAILURE);
448 } else if (ret == 0) {
449 close(children[i].up_pipe[0]);
450 run_child(i);
451 } else {
452 children[i].pid = ret;
453 close(children[i].up_pipe[1]);
454 printf("Waiting on CPU%d\n", (int)i);
455 wait_for_child(&children[i]);
456 }
457 }
458 }
459
460
perfmon_exit(void)461 void perfmon_exit(void)
462 {
463 size_t i;
464
465 if (cpu_type == CPU_TIMER_INT)
466 return;
467
468 if (!no_xen)
469 return;
470
471 for (i = 0; i < nr_cpus; ++i) {
472 if (children[i].pid) {
473 int c_pid = children[i].pid;
474 children[i].pid = 0;
475 if (kill(c_pid, SIGKILL)==0)
476 waitpid(c_pid, NULL, 0);
477 }
478 }
479 }
480
481
perfmon_start(void)482 void perfmon_start(void)
483 {
484 size_t i;
485
486 if (cpu_type == CPU_TIMER_INT)
487 return;
488
489 if (!no_xen) {
490 perfmon_start_child(xen_ctx->ctx_fd);
491 return;
492 }
493
494 for (i = 0; i < nr_cpus; ++i) {
495 if (kill(children[i].pid, SIGUSR1)) {
496 perror("Unable to start perfmon");
497 exit(EXIT_FAILURE);
498 }
499 }
500 }
501
502
perfmon_stop(void)503 void perfmon_stop(void)
504 {
505 size_t i;
506
507 if (cpu_type == CPU_TIMER_INT)
508 return;
509
510 if (!no_xen) {
511 perfmon_stop_child(xen_ctx->ctx_fd);
512 return;
513 }
514
515 for (i = 0; i < nr_cpus; ++i)
516 if (kill(children[i].pid, SIGUSR2)) {
517 perror("Unable to stop perfmon");
518 exit(EXIT_FAILURE);
519 }
520 }
521
522 #endif /* __ia64__ */
523