• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * @file opd_perfmon.c
3  * perfmonctl() handling
4  *
5  * @remark Copyright 2003 OProfile authors
6  * @remark Read the file COPYING
7  *
8  * @author John Levon
9  */
10 
11 #ifdef __ia64__
12 
13 /* need this for sched_setaffinity() in <sched.h> */
14 #define _GNU_SOURCE
15 
16 #include "oprofiled.h"
17 #include "opd_perfmon.h"
18 #include "opd_events.h"
19 
20 #include "op_cpu_type.h"
21 #include "op_libiberty.h"
22 #include "op_hw_config.h"
23 
24 #include <sys/syscall.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <limits.h>
28 #include <signal.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #ifdef HAVE_SCHED_SETAFFINITY
36 #include <sched.h>
37 #endif
38 
39 extern op_cpu cpu_type;
40 
41 #ifndef HAVE_SCHED_SETAFFINITY
42 
43 /* many glibc's are not yet up to date */
44 #ifndef __NR_sched_setaffinity
45 #define __NR_sched_setaffinity 1231
46 #endif
47 
48 /* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
49 #define CPU_SETSIZE	1024
50 #define __NCPUBITS	(8 * sizeof (unsigned long))
51 typedef struct
52 {
53 	unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
54 } cpu_set_t;
55 
56 #define CPU_SET(cpu, cpusetp) \
57 	((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
58 #define CPU_ZERO(cpusetp) \
59 	memset((cpusetp), 0, sizeof(cpu_set_t))
60 
61 static int
sched_setaffinity(pid_t pid,size_t len,cpu_set_t const * cpusetp)62 sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
63 {
64 	return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
65 }
66 #endif
67 
68 
69 #ifndef HAVE_PERFMONCTL
70 #ifndef __NR_perfmonctl
71 #define __NR_perfmonctl 1175
72 #endif
73 
perfmonctl(int fd,int cmd,void * arg,int narg)74 static int perfmonctl(int fd, int cmd, void * arg, int narg)
75 {
76 	return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
77 }
78 #endif
79 
80 
81 static unsigned char uuid[16] = {
82 	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
83 	0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
84 };
85 
86 
87 static size_t nr_cpus;
88 
89 struct child {
90 	pid_t pid;
91 	int up_pipe[2];
92 	int ctx_fd;
93 	sig_atomic_t sigusr1;
94 	sig_atomic_t sigusr2;
95 	sig_atomic_t sigterm;
96 };
97 
98 static struct child * children;
99 
perfmon_start_child(int ctx_fd)100 static void perfmon_start_child(int ctx_fd)
101 {
102 	if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
103 		exit(EXIT_FAILURE);
104 	}
105 }
106 
107 
perfmon_stop_child(int ctx_fd)108 static void perfmon_stop_child(int ctx_fd)
109 {
110 	if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
111 		exit(EXIT_FAILURE);
112 	}
113 }
114 
115 
child_sigusr1(int val)116 static void child_sigusr1(int val __attribute__((unused)))
117 {
118 	size_t i;
119 
120 	for (i = 0; i < nr_cpus; ++i) {
121 		if (children[i].pid == getpid()) {
122 			children[i].sigusr1 = 1;
123 			return;
124 		}
125 	}
126 }
127 
128 
child_sigusr2(int val)129 static void child_sigusr2(int val __attribute__((unused)))
130 {
131 	size_t i;
132 
133 	for (i = 0; i < nr_cpus; ++i) {
134 		if (children[i].pid == getpid()) {
135 			children[i].sigusr2 = 1;
136 			return;
137 		}
138 	}
139 }
140 
141 
child_sigterm(int val)142 static void child_sigterm(int val __attribute__((unused)))
143 {
144 	kill(getppid(), SIGTERM);
145 }
146 
147 
set_affinity(size_t cpu)148 static void set_affinity(size_t cpu)
149 {
150 	cpu_set_t set;
151 	int err;
152 
153 	CPU_ZERO(&set);
154 	CPU_SET(cpu, &set);
155 
156 	err = sched_setaffinity(getpid(), sizeof(set), &set);
157 
158 	if (err == -1) {
159 		perror("Failed to set affinity");
160 		exit(EXIT_FAILURE);
161 	}
162 }
163 
164 
setup_signals(void)165 static void setup_signals(void)
166 {
167 	struct sigaction act;
168 	sigset_t mask;
169 
170 	sigemptyset(&mask);
171 	sigaddset(&mask, SIGUSR1);
172 	sigaddset(&mask, SIGUSR2);
173 	sigprocmask(SIG_BLOCK, &mask, NULL);
174 
175 	act.sa_handler = child_sigusr1;
176 	act.sa_flags = 0;
177 	sigemptyset(&act.sa_mask);
178 
179 	if (sigaction(SIGUSR1, &act, NULL)) {
180 		perror("oprofiled: install of SIGUSR1 handler failed");
181 		exit(EXIT_FAILURE);
182 	}
183 
184 	act.sa_handler = child_sigusr2;
185 	act.sa_flags = 0;
186 	sigemptyset(&act.sa_mask);
187 
188 	if (sigaction(SIGUSR2, &act, NULL)) {
189 		perror("oprofiled: install of SIGUSR2 handler failed");
190 		exit(EXIT_FAILURE);
191 	}
192 
193 	act.sa_handler = child_sigterm;
194 	act.sa_flags = 0;
195 	sigemptyset(&act.sa_mask);
196 
197 	if (sigaction(SIGTERM, &act, NULL)) {
198 		perror("oprofiled: install of SIGTERM handler failed");
199 		exit(EXIT_FAILURE);
200 	}
201 }
202 
203 
204 /** create the per-cpu context */
create_context(struct child * self)205 static void create_context(struct child * self)
206 {
207 	pfarg_context_t ctx;
208 	int err;
209 
210 	memset(&ctx, 0, sizeof(pfarg_context_t));
211 	memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
212 	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
213 
214 	err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
215 	if (err == -1) {
216 		perror("CREATE_CONTEXT failed");
217 		exit(EXIT_FAILURE);
218 	}
219 
220 	self->ctx_fd = ctx.ctx_fd;
221 }
222 
223 
224 /** program the perfmon counters */
write_pmu(struct child * self)225 static void write_pmu(struct child * self)
226 {
227 	pfarg_reg_t pc[OP_MAX_COUNTERS];
228 	pfarg_reg_t pd[OP_MAX_COUNTERS];
229 	int err;
230 	size_t i;
231 
232 	memset(pc, 0, sizeof(pc));
233 	memset(pd, 0, sizeof(pd));
234 
235 #define PMC_GEN_INTERRUPT (1UL << 5)
236 #define PMC_PRIV_MONITOR (1UL << 6)
237 /* McKinley requires pmc4 to have bit 23 set (enable PMU).
238  * It is supposedly ignored in other pmc registers.
239  */
240 #define PMC_MANDATORY (1UL << 23)
241 #define PMC_USER (1UL << 3)
242 #define PMC_KERNEL (1UL << 0)
243 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
244 		struct opd_event * event = &opd_events[i];
245 		pc[i].reg_num = event->counter + 4;
246 		pc[i].reg_value = PMC_GEN_INTERRUPT;
247 		pc[i].reg_value |= PMC_PRIV_MONITOR;
248 		pc[i].reg_value |= PMC_MANDATORY;
249 		(event->user) ? (pc[i].reg_value |= PMC_USER)
250 		              : (pc[i].reg_value &= ~PMC_USER);
251 		(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
252 		                : (pc[i].reg_value &= ~PMC_KERNEL);
253 		pc[i].reg_value &= ~(0xff << 8);
254 		pc[i].reg_value |= ((event->value & 0xff) << 8);
255 		pc[i].reg_value &= ~(0xf << 16);
256 		pc[i].reg_value |= ((event->um & 0xf) << 16);
257 		pc[i].reg_smpl_eventid = event->counter;
258 	}
259 
260 	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
261 		struct opd_event * event = &opd_events[i];
262 		pd[i].reg_value = ~0UL - event->count + 1;
263 		pd[i].reg_short_reset = ~0UL - event->count + 1;
264 		pd[i].reg_num = event->counter + 4;
265 	}
266 
267 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
268 	if (err == -1) {
269 		perror("Couldn't write PMCs");
270 		exit(EXIT_FAILURE);
271 	}
272 
273 	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
274 	if (err == -1) {
275 		perror("Couldn't write PMDs");
276 		exit(EXIT_FAILURE);
277 	}
278 }
279 
280 
load_context(struct child * self)281 static void load_context(struct child * self)
282 {
283 	pfarg_load_t load_args;
284 	int err;
285 
286 	memset(&load_args, 0, sizeof(load_args));
287 	load_args.load_pid = self->pid;
288 
289 	err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
290 	if (err == -1) {
291 		perror("Couldn't load context");
292 		exit(EXIT_FAILURE);
293 	}
294 }
295 
296 
notify_parent(struct child * self,size_t cpu)297 static void notify_parent(struct child * self, size_t cpu)
298 {
299 	for (;;) {
300 		ssize_t ret;
301 		ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
302 		if (ret == sizeof(size_t))
303 			break;
304 		if (ret < 0 && errno != EINTR) {
305 			perror("Failed to write child pipe:");
306 			exit(EXIT_FAILURE);
307 		}
308 	}
309 }
310 
311 static struct child * inner_child;
close_pipe(void)312 void close_pipe(void)
313 {
314 	close(inner_child->up_pipe[1]);
315 }
316 
run_child(size_t cpu)317 static void run_child(size_t cpu)
318 {
319 	struct child * self = &children[cpu];
320 
321 	self->pid = getpid();
322 	self->sigusr1 = 0;
323 	self->sigusr2 = 0;
324 	self->sigterm = 0;
325 
326 	inner_child = self;
327 	if (atexit(close_pipe)){
328 		close_pipe();
329 		exit(EXIT_FAILURE);
330 	}
331 
332 	umask(0);
333 	/* Change directory to allow directory to be removed */
334 	if (chdir("/") < 0) {
335 		perror("Unable to chdir to \"/\"");
336 		exit(EXIT_FAILURE);
337 	}
338 
339 	setup_signals();
340 
341 	set_affinity(cpu);
342 
343 	create_context(self);
344 
345 	write_pmu(self);
346 
347 	load_context(self);
348 
349 	notify_parent(self, cpu);
350 
351 	/* Redirect standard files to /dev/null */
352 	freopen( "/dev/null", "r", stdin);
353 	freopen( "/dev/null", "w", stdout);
354 	freopen( "/dev/null", "w", stderr);
355 
356 	for (;;) {
357 		sigset_t sigmask;
358 		sigfillset(&sigmask);
359 		sigdelset(&sigmask, SIGUSR1);
360 		sigdelset(&sigmask, SIGUSR2);
361 		sigdelset(&sigmask, SIGTERM);
362 
363 		if (self->sigusr1) {
364 			perfmon_start_child(self->ctx_fd);
365 			self->sigusr1 = 0;
366 		}
367 
368 		if (self->sigusr2) {
369 			perfmon_stop_child(self->ctx_fd);
370 			self->sigusr2 = 0;
371 		}
372 
373 		sigsuspend(&sigmask);
374 	}
375 }
376 
377 
wait_for_child(struct child * child)378 static void wait_for_child(struct child * child)
379 {
380 	size_t tmp;
381 	for (;;) {
382 		ssize_t ret;
383 		ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
384 		if (ret == sizeof(size_t))
385 			break;
386 		if ((ret < 0 && errno != EINTR) || ret == 0 ) {
387 			perror("Failed to read child pipe");
388 			exit(EXIT_FAILURE);
389 		}
390 	}
391 	printf("Perfmon child up on CPU%d\n", (int)tmp);
392 	fflush(stdout);
393 
394 	close(child->up_pipe[0]);
395 }
396 
397 static struct child* xen_ctx;
398 
perfmon_init(void)399 void perfmon_init(void)
400 {
401 	size_t i;
402 	long nr;
403 
404 	if (cpu_type == CPU_TIMER_INT)
405 		return;
406 
407 	if (!no_xen) {
408 		xen_ctx = xmalloc(sizeof(struct child));
409 		xen_ctx->pid = getpid();
410 		xen_ctx->up_pipe[0] = -1;
411 		xen_ctx->up_pipe[1] = -1;
412 		xen_ctx->sigusr1 = 0;
413 		xen_ctx->sigusr2 = 0;
414 		xen_ctx->sigterm = 0;
415 
416 		create_context(xen_ctx);
417 
418 		write_pmu(xen_ctx);
419 
420 		load_context(xen_ctx);
421 		return;
422 	}
423 
424 
425 	nr = sysconf(_SC_NPROCESSORS_ONLN);
426 	if (nr == -1) {
427 		fprintf(stderr, "Couldn't determine number of CPUs.\n");
428 		exit(EXIT_FAILURE);
429 	}
430 
431 	nr_cpus = nr;
432 
433 	children = xmalloc(sizeof(struct child) * nr_cpus);
434 	bzero(children, sizeof(struct child) * nr_cpus);
435 
436 	for (i = 0; i < nr_cpus; ++i) {
437 		int ret;
438 
439 		if (pipe(children[i].up_pipe)) {
440 			perror("Couldn't create child pipe");
441 			exit(EXIT_FAILURE);
442 		}
443 
444 		ret = fork();
445 		if (ret == -1) {
446 			perror("Couldn't fork perfmon child");
447 			exit(EXIT_FAILURE);
448 		} else if (ret == 0) {
449 			close(children[i].up_pipe[0]);
450 			run_child(i);
451 		} else {
452 			children[i].pid = ret;
453 			close(children[i].up_pipe[1]);
454 			printf("Waiting on CPU%d\n", (int)i);
455 			wait_for_child(&children[i]);
456 		}
457 	}
458 }
459 
460 
perfmon_exit(void)461 void perfmon_exit(void)
462 {
463 	size_t i;
464 
465 	if (cpu_type == CPU_TIMER_INT)
466 		return;
467 
468 	if (!no_xen)
469 		return;
470 
471 	for (i = 0; i < nr_cpus; ++i) {
472 		if (children[i].pid) {
473 			int c_pid = children[i].pid;
474 			children[i].pid = 0;
475 			if (kill(c_pid, SIGKILL)==0)
476 				waitpid(c_pid, NULL, 0);
477 		}
478 	}
479 }
480 
481 
perfmon_start(void)482 void perfmon_start(void)
483 {
484 	size_t i;
485 
486 	if (cpu_type == CPU_TIMER_INT)
487 		return;
488 
489 	if (!no_xen) {
490 		perfmon_start_child(xen_ctx->ctx_fd);
491 		return;
492 	}
493 
494 	for (i = 0; i < nr_cpus; ++i) {
495 		if (kill(children[i].pid, SIGUSR1)) {
496 			perror("Unable to start perfmon");
497 			exit(EXIT_FAILURE);
498 		}
499 	}
500 }
501 
502 
perfmon_stop(void)503 void perfmon_stop(void)
504 {
505 	size_t i;
506 
507 	if (cpu_type == CPU_TIMER_INT)
508 		return;
509 
510 	if (!no_xen) {
511 		perfmon_stop_child(xen_ctx->ctx_fd);
512 		return;
513 	}
514 
515 	for (i = 0; i < nr_cpus; ++i)
516 		if (kill(children[i].pid, SIGUSR2)) {
517 			perror("Unable to stop perfmon");
518 			exit(EXIT_FAILURE);
519 		}
520 }
521 
522 #endif /* __ia64__ */
523