• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * @file opd_perfmon.c
3  * perfmonctl() handling
4  *
5  * @remark Copyright 2003 OProfile authors
6  * @remark Read the file COPYING
7  *
8  * @author John Levon
9  */
10 
11 #ifdef __ia64__
12 
13 /* need this for sched_setaffinity() in <sched.h> */
14 #define _GNU_SOURCE
15 
16 #include "oprofiled.h"
17 #include "opd_perfmon.h"
18 #include "opd_events.h"
19 
20 #include "op_cpu_type.h"
21 #include "op_libiberty.h"
22 #include "op_hw_config.h"
23 
24 #include <sys/syscall.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <limits.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <errno.h>
33 #ifdef HAVE_SCHED_SETAFFINITY
34 #include <sched.h>
35 #endif
36 
37 extern op_cpu cpu_type;
38 
39 #ifndef HAVE_SCHED_SETAFFINITY
40 
41 /* many glibc's are not yet up to date */
42 #ifndef __NR_sched_setaffinity
43 #define __NR_sched_setaffinity 1231
44 #endif
45 
46 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
47 #define CPU_SETSIZE	1024
48 #define __NCPUBITS	(8 * sizeof (unsigned long))
49 typedef struct
50 {
51 	unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
52 } cpu_set_t;
53 
54 #define CPU_SET(cpu, cpusetp) \
55 	((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
56 #define CPU_ZERO(cpusetp) \
57 	memset((cpusetp), 0, sizeof(cpu_set_t))
58 
59 static int
sched_setaffinity(pid_t pid,size_t len,cpu_set_t const * cpusetp)60 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
61 {
62 	return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
63 }
64 #endif
65 
66 
67 #ifndef HAVE_PERFMONCTL
68 #ifndef __NR_perfmonctl
69 #define __NR_perfmonctl 1175
70 #endif
71 
perfmonctl(int fd,int cmd,void * arg,int narg)72 static int perfmonctl(int fd, int cmd, void * arg, int narg)
73 {
74 	return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
75 }
76 #endif
77 
78 
79 static unsigned char uuid[16] = {
80 	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
81 	0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
82 };
83 
84 
85 static size_t nr_cpus;
86 
87 struct child {
88 	pid_t pid;
89 	int up_pipe[2];
90 	int ctx_fd;
91 	sig_atomic_t sigusr1;
92 	sig_atomic_t sigusr2;
93 	sig_atomic_t sigterm;
94 };
95 
96 static struct child * children;
97 
perfmon_start_child(int ctx_fd)98 static void perfmon_start_child(int ctx_fd)
99 {
100 	if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
101 		perror("Couldn't start perfmon: ");
102 		exit(EXIT_FAILURE);
103 	}
104 }
105 
106 
perfmon_stop_child(int ctx_fd)107 static void perfmon_stop_child(int ctx_fd)
108 {
109 	if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
110 		perror("Couldn't stop perfmon: ");
111 		exit(EXIT_FAILURE);
112 	}
113 }
114 
115 
child_sigusr1(int val)116 static void child_sigusr1(int val __attribute__((unused)))
117 {
118 	size_t i;
119 
120 	for (i = 0; i < nr_cpus; ++i) {
121 		if (children[i].pid == getpid()) {
122 			children[i].sigusr1 = 1;
123 			return;
124 		}
125 	}
126 }
127 
128 
child_sigusr2(int val)129 static void child_sigusr2(int val __attribute__((unused)))
130 {
131 	size_t i;
132 
133 	for (i = 0; i < nr_cpus; ++i) {
134 		if (children[i].pid == getpid()) {
135 			children[i].sigusr2 = 1;
136 			return;
137 		}
138 	}
139 }
140 
141 
child_sigterm(int val)142 static void child_sigterm(int val __attribute__((unused)))
143 {
144 	printf("Child received SIGTERM, killing parent.\n");
145 	kill(getppid(), SIGTERM);
146 }
147 
148 
set_affinity(size_t cpu)149 static void set_affinity(size_t cpu)
150 {
151 	cpu_set_t set;
152 
153 	CPU_ZERO(&set);
154 	CPU_SET(cpu, &set);
155 
156 	int err = sched_setaffinity(getpid(), sizeof(set), &set);
157 
158 	if (err == -1) {
159 		fprintf(stderr, "Failed to set affinity: %s\n",
160 			    strerror(errno));
161 		exit(EXIT_FAILURE);
162 	}
163 }
164 
165 
setup_signals(void)166 static void setup_signals(void)
167 {
168 	struct sigaction act;
169 	sigset_t mask;
170 
171 	sigemptyset(&mask);
172 	sigaddset(&mask, SIGUSR1);
173 	sigaddset(&mask, SIGUSR2);
174 	sigprocmask(SIG_BLOCK, &mask, NULL);
175 
176 	act.sa_handler = child_sigusr1;
177 	act.sa_flags = 0;
178 	sigemptyset(&act.sa_mask);
179 
180 	if (sigaction(SIGUSR1, &act, NULL)) {
181 		perror("oprofiled: install of SIGUSR1 handler failed: ");
182 		exit(EXIT_FAILURE);
183 	}
184 
185 	act.sa_handler = child_sigusr2;
186 	act.sa_flags = 0;
187 	sigemptyset(&act.sa_mask);
188 
189 	if (sigaction(SIGUSR2, &act, NULL)) {
190 		perror("oprofiled: install of SIGUSR2 handler failed: ");
191 		exit(EXIT_FAILURE);
192 	}
193 
194 	act.sa_handler = child_sigterm;
195 	act.sa_flags = 0;
196 	sigemptyset(&act.sa_mask);
197 
198 	if (sigaction(SIGTERM, &act, NULL)) {
199 		perror("oprofiled: install of SIGTERM handler failed: ");
200 		exit(EXIT_FAILURE);
201 	}
202 }
203 
204 
205 /** create the per-cpu context */
create_context(struct child * self)206 static void create_context(struct child * self)
207 {
208 	pfarg_context_t ctx;
209 	int err;
210 
211 	memset(&ctx, 0, sizeof(pfarg_context_t));
212 	memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
213 	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
214 
215 	err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
216 	if (err == -1) {
217 		fprintf(stderr, "CREATE_CONTEXT failed: %s\n",
218 		        strerror(errno));
219 		exit(EXIT_FAILURE);
220 	}
221 
222 	self->ctx_fd = ctx.ctx_fd;
223 }
224 
225 
226 /** program the perfmon counters */
write_pmu(struct child * self)227 static void write_pmu(struct child * self)
228 {
229 	pfarg_reg_t pc[OP_MAX_COUNTERS];
230 	pfarg_reg_t pd[OP_MAX_COUNTERS];
231 	int err;
232 	size_t i;
233 
234 	memset(pc, 0, sizeof(pc));
235 	memset(pd, 0, sizeof(pd));
236 
237 #define PMC_GEN_INTERRUPT (1UL << 5)
238 #define PMC_PRIV_MONITOR (1UL << 6)
239 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
240  * It is supposedly ignored in other pmc registers.
241  */
242 #define PMC_MANDATORY (1UL << 23)
243 #define PMC_USER (1UL << 3)
244 #define PMC_KERNEL (1UL << 0)
245 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
246 		struct opd_event * event = &opd_events[i];
247 		pc[i].reg_num = event->counter + 4;
248 		pc[i].reg_value = PMC_GEN_INTERRUPT;
249 		pc[i].reg_value |= PMC_PRIV_MONITOR;
250 		pc[i].reg_value |= PMC_MANDATORY;
251 		(event->user) ? (pc[i].reg_value |= PMC_USER)
252 		              : (pc[i].reg_value &= ~PMC_USER);
253 		(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
254 		                : (pc[i].reg_value &= ~PMC_KERNEL);
255 		pc[i].reg_value &= ~(0xff << 8);
256 		pc[i].reg_value |= ((event->value & 0xff) << 8);
257 		pc[i].reg_value &= ~(0xf << 16);
258 		pc[i].reg_value |= ((event->um & 0xf) << 16);
259 		pc[i].reg_smpl_eventid = event->counter;
260 	}
261 
262 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
263 		struct opd_event * event = &opd_events[i];
264 		pd[i].reg_value = ~0UL - event->count + 1;
265 		pd[i].reg_short_reset = ~0UL - event->count + 1;
266 		pd[i].reg_num = event->counter + 4;
267 	}
268 
269 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
270 	if (err == -1) {
271 		perror("Couldn't write PMCs: ");
272 		exit(EXIT_FAILURE);
273 	}
274 
275 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
276 	if (err == -1) {
277 		perror("Couldn't write PMDs: ");
278 		exit(EXIT_FAILURE);
279 	}
280 }
281 
282 
load_context(struct child * self)283 static void load_context(struct child * self)
284 {
285 	pfarg_load_t load_args;
286 	int err;
287 
288 	memset(&load_args, 0, sizeof(load_args));
289 	load_args.load_pid = self->pid;
290 
291 	err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
292 	if (err == -1) {
293 		perror("Couldn't load context: ");
294 		exit(EXIT_FAILURE);
295 	}
296 }
297 
298 
notify_parent(struct child * self,size_t cpu)299 static void notify_parent(struct child * self, size_t cpu)
300 {
301 	for (;;) {
302 		ssize_t ret;
303 		ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
304 		if (ret == sizeof(size_t))
305 			break;
306 		if (ret < 0 && errno != EINTR) {
307 			fprintf(stderr, "Failed to write child pipe with %s\n",
308 			        strerror(errno));
309 			exit(EXIT_FAILURE);
310 		}
311 	}
312 }
313 
314 
run_child(size_t cpu)315 static void run_child(size_t cpu)
316 {
317 	struct child * self = &children[cpu];
318 
319 	self->pid = getpid();
320 	self->sigusr1 = 0;
321 	self->sigusr2 = 0;
322 	self->sigterm = 0;
323 
324 	setup_signals();
325 
326 	set_affinity(cpu);
327 
328 	create_context(self);
329 
330 	write_pmu(self);
331 
332 	load_context(self);
333 
334 	notify_parent(self, cpu);
335 
336 	for (;;) {
337 		sigset_t sigmask;
338 		sigfillset(&sigmask);
339 		sigdelset(&sigmask, SIGUSR1);
340 		sigdelset(&sigmask, SIGUSR2);
341 		sigdelset(&sigmask, SIGTERM);
342 
343 		if (self->sigusr1) {
344 			printf("PFM_START on CPU%d\n", (int)cpu);
345 			fflush(stdout);
346 			perfmon_start_child(self->ctx_fd);
347 			self->sigusr1 = 0;
348 		}
349 
350 		if (self->sigusr2) {
351 			printf("PFM_STOP on CPU%d\n", (int)cpu);
352 			fflush(stdout);
353 			perfmon_stop_child(self->ctx_fd);
354 			self->sigusr2 = 0;
355 		}
356 
357 		sigsuspend(&sigmask);
358 	}
359 }
360 
361 
wait_for_child(struct child * child)362 static void wait_for_child(struct child * child)
363 {
364 	size_t tmp;
365 	for (;;) {
366 		ssize_t ret;
367 		ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
368 		if (ret == sizeof(size_t))
369 			break;
370 		if (ret < 0 && errno != EINTR) {
371 			fprintf(stderr, "Failed to read child pipe with %s\n",
372 			        strerror(errno));
373 			exit(EXIT_FAILURE);
374 		}
375 	}
376 	printf("Perfmon child up on CPU%d\n", (int)tmp);
377 	fflush(stdout);
378 
379 	close(child->up_pipe[0]);
380 	close(child->up_pipe[1]);
381 }
382 
383 static struct child* xen_ctx;
384 
perfmon_init(void)385 void perfmon_init(void)
386 {
387 	size_t i;
388 	long nr;
389 
390 	if (cpu_type == CPU_TIMER_INT)
391 		return;
392 
393 	if (!no_xen) {
394 		xen_ctx = xmalloc(sizeof(struct child));
395 		xen_ctx->pid = getpid();
396 		xen_ctx->up_pipe[0] = -1;
397 		xen_ctx->up_pipe[1] = -1;
398 		xen_ctx->sigusr1 = 0;
399 		xen_ctx->sigusr2 = 0;
400 		xen_ctx->sigterm = 0;
401 
402 		create_context(xen_ctx);
403 
404 		write_pmu(xen_ctx);
405 
406 		load_context(xen_ctx);
407 		return;
408 	}
409 
410 
411 	nr = sysconf(_SC_NPROCESSORS_ONLN);
412 	if (nr == -1) {
413 		fprintf(stderr, "Couldn't determine number of CPUs.\n");
414 		exit(EXIT_FAILURE);
415 	}
416 
417 	nr_cpus = nr;
418 
419 	children = xmalloc(sizeof(struct child) * nr_cpus);
420 
421 	for (i = 0; i < nr_cpus; ++i) {
422 		int ret;
423 
424 		if (pipe(children[i].up_pipe)) {
425 			perror("Couldn't create child pipe.\n");
426 			exit(EXIT_FAILURE);
427 		}
428 
429 		ret = fork();
430 		if (ret == -1) {
431 			fprintf(stderr, "Couldn't fork perfmon child.\n");
432 			exit(EXIT_FAILURE);
433 		} else if (ret == 0) {
434 			printf("Running perfmon child on CPU%d.\n", (int)i);
435 			fflush(stdout);
436 			run_child(i);
437 		} else {
438 			children[i].pid = ret;
439 			printf("Waiting on CPU%d\n", (int)i);
440 			wait_for_child(&children[i]);
441 		}
442 	}
443 }
444 
445 
perfmon_exit(void)446 void perfmon_exit(void)
447 {
448 	size_t i;
449 
450 	if (cpu_type == CPU_TIMER_INT)
451 		return;
452 
453 	if (!no_xen)
454 		return;
455 
456 	for (i = 0; i < nr_cpus; ++i) {
457 		kill(children[i].pid, SIGKILL);
458 		waitpid(children[i].pid, NULL, 0);
459 	}
460 }
461 
462 
perfmon_start(void)463 void perfmon_start(void)
464 {
465 	size_t i;
466 
467 	if (cpu_type == CPU_TIMER_INT)
468 		return;
469 
470 	if (!no_xen) {
471 		perfmon_start_child(xen_ctx->ctx_fd);
472 		return;
473 	}
474 
475 	for (i = 0; i < nr_cpus; ++i)
476 		kill(children[i].pid, SIGUSR1);
477 }
478 
479 
perfmon_stop(void)480 void perfmon_stop(void)
481 {
482 	size_t i;
483 
484 	if (cpu_type == CPU_TIMER_INT)
485 		return;
486 
487 	if (!no_xen) {
488 		perfmon_stop_child(xen_ctx->ctx_fd);
489 		return;
490 	}
491 
492 	for (i = 0; i < nr_cpus; ++i)
493 		kill(children[i].pid, SIGUSR2);
494 }
495 
496 #endif /* __ia64__ */
497