• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2009 Paul Mackerras <paulus@samba.org>
4  * Copyright (c) 2019 Linux Test Project
5  */
6 /*
7  * Here's a little test program that checks whether software counters
8  * (specifically, the task clock counter) work correctly when they're in
9  * a group with hardware counters.
10  *
11  * What it does is to create several groups, each with one hardware
12  * counter, counting instructions, plus a task clock counter.  It needs
13  * to know an upper bound N on the number of hardware counters you have
14  * (N defaults to 8), and it creates N+4 groups to force them to be
15  * multiplexed.  It also creates an overall task clock counter.
16  *
17  * Then it spins for a while, and then stops all the counters and reads
18  * them.  It takes the total of the task clock counters in the groups and
19  * computes the ratio of that total to the overall execution time from
20  * the overall task clock counter.
21  *
22  * That ratio should be equal to the number of actual hardware counters
23  * that can count instructions.  If the task clock counters in the groups
24  * don't stop when their group gets taken off the PMU, the ratio will
25  * instead be close to N+4.  The program will declare that the test fails
26  * if the ratio is greater than N (actually, N + 0.0001 to allow for FP
27  * rounding errors).
28  */
29 
30 #define _GNU_SOURCE
31 #include <errno.h>
32 #include <inttypes.h>
33 #include <sched.h>
34 #include <signal.h>
35 #include <stddef.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <sys/prctl.h>
41 #include <sys/time.h>
42 #include <sys/types.h>
43 
44 #include "config.h"
45 #include "tst_test.h"
46 #include "lapi/cpuset.h"
47 #include "lapi/syscalls.h"
48 
49 #if HAVE_PERF_EVENT_ATTR
50 #include <linux/types.h>
51 #include <linux/perf_event.h>
52 
53 #define MAX_CTRS	1000
54 
55 struct read_format {
56 	unsigned long long value;
57 	/* if PERF_FORMAT_TOTAL_TIME_ENABLED */
58 	unsigned long long time_enabled;
59 	/* if PERF_FORMAT_TOTAL_TIME_RUNNING */
60 	unsigned long long time_running;
61 };
62 
63 static char *verbose;
64 static struct tst_option options[] = {
65 	{"v", &verbose, "-v\tverbose output"},
66 	{NULL, NULL, NULL},
67 };
68 
69 static int ntotal, nhw;
70 static int tsk0 = -1, hwfd[MAX_CTRS], tskfd[MAX_CTRS];
71 static int volatile work_done;
72 static unsigned int est_loops;
73 
perf_event_open(struct perf_event_attr * event,pid_t pid,int cpu,int group_fd,unsigned long flags)74 static int perf_event_open(struct perf_event_attr *event, pid_t pid,
75 	int cpu, int group_fd, unsigned long flags)
76 {
77 	int ret;
78 
79 	ret = tst_syscall(__NR_perf_event_open, event, pid, cpu,
80 		group_fd, flags);
81 
82 	if (ret != -1)
83 		return ret;
84 
85 	tst_res(TINFO, "perf_event_open event.type: %"PRIu32
86 		", event.config: %"PRIu64, (uint32_t)event->type,
87 		(uint64_t)event->config);
88 	if (errno == ENOENT || errno == ENODEV) {
89 		tst_brk(TCONF | TERRNO,
90 			"perf_event_open type/config not supported");
91 	}
92 	tst_brk(TBROK | TERRNO, "perf_event_open failed");
93 
94 	/* unreachable */
95 	return -1;
96 }
97 
all_counters_set(int state)98 static void all_counters_set(int state)
99 {
100 	if (prctl(state) == -1)
101 		tst_brk(TBROK | TERRNO, "prctl(%d) failed", state);
102 }
103 
alarm_handler(int sig LTP_ATTRIBUTE_UNUSED)104 static void alarm_handler(int sig LTP_ATTRIBUTE_UNUSED)
105 {
106 	work_done = 1;
107 }
108 
bench_work(int time_ms)109 static void bench_work(int time_ms)
110 {
111 	unsigned int i;
112 	struct itimerval val;
113 	struct sigaction sa;
114 
115 	memset(&sa, 0, sizeof(sa));
116 	sa.sa_handler = alarm_handler;
117 	sa.sa_flags = SA_RESETHAND;
118 	SAFE_SIGACTION(SIGALRM, &sa, NULL);
119 
120 	work_done = 0;
121 	memset(&val, 0, sizeof(val));
122 	val.it_value.tv_sec = time_ms / 1000;
123 	val.it_value.tv_usec = (time_ms % 1000) * 1000;
124 
125 	if (setitimer(ITIMER_REAL, &val, NULL))
126 		tst_brk(TBROK | TERRNO, "setitimer");
127 
128 	while (!work_done) {
129 		for (i = 0; i < 100000; ++i)
130 			asm volatile (""::"g" (i));
131 		est_loops++;
132 	}
133 
134 	tst_res(TINFO, "bench_work estimated loops = %u in %d ms", est_loops, time_ms);
135 }
136 
do_work(int mult)137 static void do_work(int mult)
138 {
139 	unsigned long i, j, loops = mult * est_loops;
140 
141 	for (j = 0; j < loops; j++)
142 		for (i = 0; i < 100000; i++)
143 			asm volatile (""::"g" (i));
144 }
145 
146 #ifndef __s390__
count_hardware_counters(void)147 static int count_hardware_counters(void)
148 {
149 	struct perf_event_attr hw_event;
150 	int i, hwctrs = 0;
151 	int fdarry[MAX_CTRS];
152 	struct read_format buf, buf2, diff;
153 
154 	memset(&hw_event, 0, sizeof(struct perf_event_attr));
155 
156 	hw_event.type = PERF_TYPE_HARDWARE;
157 	hw_event.size = sizeof(struct perf_event_attr);
158 	hw_event.disabled = 1;
159 	hw_event.config =  PERF_COUNT_HW_INSTRUCTIONS;
160 	hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
161 		PERF_FORMAT_TOTAL_TIME_RUNNING;
162 
163 	for (i = 0; i < MAX_CTRS; i++) {
164 		fdarry[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
165 
166 		all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
167 		do_work(1);
168 		if (read(fdarry[i], &buf, sizeof(buf)) != sizeof(buf))
169 			tst_brk(TBROK | TERRNO, "error reading counter(s) #1");
170 		do_work(1);
171 		all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
172 		if (read(fdarry[i], &buf2, sizeof(buf2)) != sizeof(buf2))
173 			tst_brk(TBROK | TERRNO, "error reading counter(s) #2");
174 
175 		diff.value = buf2.value - buf.value;
176 		diff.time_enabled = buf2.time_enabled - buf.time_enabled;
177 		diff.time_running = buf2.time_running - buf.time_running;
178 
179 		tst_res(TINFO, "[%d] value:%lld time_enabled:%lld "
180 		       "time_running:%lld", i, diff.value,
181 		       diff.time_enabled, diff.time_running);
182 
183 		/*
184 		 * Normally time_enabled and time_running are the same value.
185 		 * But if more events are started than available counter slots
186 		 * on the PMU, then multiplexing happens and events run only
187 		 * part of the time. Time_enabled and time_running's values
188 		 * will be different. In this case the time_enabled and time_
189 		 * running values can be used to scale an estimated value for
190 		 * the count. So if buf.time_enabled and buf.time_running are
191 		 * not equal, we can think that PMU hardware counters
192 		 * multiplexing happens and the number of the opened events
193 		 * are the number of max available hardware counters.
194 		 */
195 		if (diff.time_enabled != diff.time_running) {
196 			hwctrs = i;
197 			break;
198 		}
199 	}
200 
201 	for (i = 0; i <= hwctrs; i++)
202 		SAFE_CLOSE(fdarry[i]);
203 
204 	return hwctrs;
205 }
206 #endif /* __s390__ */
207 
bind_to_current_cpu(void)208 static void bind_to_current_cpu(void)
209 {
210 #ifdef HAVE_SCHED_GETCPU
211 	int cpu = sched_getcpu();
212 	size_t mask_size;
213 	cpu_set_t *mask;
214 
215 	if (cpu == -1)
216 		tst_brk(TBROK | TERRNO, "sched_getcpu() failed");
217 
218 	mask = CPU_ALLOC(cpu + 1);
219 	mask_size = CPU_ALLOC_SIZE(cpu + 1);
220 	CPU_ZERO_S(mask_size, mask);
221 	CPU_SET(cpu, mask);
222 	if (sched_setaffinity(0, mask_size, mask) == -1)
223 		tst_brk(TBROK | TERRNO, "sched_setaffinity() failed");
224 	CPU_FREE(mask);
225 #endif
226 }
227 
setup(void)228 static void setup(void)
229 {
230 	int i;
231 	struct perf_event_attr tsk_event, hw_event;
232 
233 	for (i = 0; i < MAX_CTRS; i++) {
234 		hwfd[i] = -1;
235 		tskfd[i] = -1;
236 	}
237 
238 	bench_work(500);
239 
240 	/*
241 	 * According to perf_event_open's manpage, the official way of
242 	 * knowing if perf_event_open() support is enabled is checking for
243 	 * the existence of the file /proc/sys/kernel/perf_event_paranoid.
244 	 */
245 	if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1)
246 		tst_brk(TCONF, "Kernel doesn't have perf_event support");
247 
248 	bind_to_current_cpu();
249 #ifdef __s390__
250 	/*
251 	 * On s390 the "time_enabled" and "time_running" values are always the
252 	 * same, therefore count_hardware_counters() does not work.
253 	 *
254 	 * There are distinct/dedicated counters that can be used independently.
255 	 * Use the dedicated counter for instructions here.
256 	 */
257 	ntotal = nhw = 1;
258 #else
259 	nhw = count_hardware_counters();
260 	ntotal = nhw + 4;
261 #endif
262 
263 	memset(&hw_event, 0, sizeof(struct perf_event_attr));
264 	memset(&tsk_event, 0, sizeof(struct perf_event_attr));
265 
266 	tsk_event.type =  PERF_TYPE_SOFTWARE;
267 	tsk_event.size = sizeof(struct perf_event_attr);
268 	tsk_event.disabled = 1;
269 	tsk_event.config = PERF_COUNT_SW_TASK_CLOCK;
270 
271 	hw_event.type = PERF_TYPE_HARDWARE;
272 	hw_event.size = sizeof(struct perf_event_attr);
273 	hw_event.disabled = 1;
274 	hw_event.config =  PERF_COUNT_HW_INSTRUCTIONS;
275 
276 	tsk0 = perf_event_open(&tsk_event, 0, -1, -1, 0);
277 	tsk_event.disabled = 0;
278 	for (i = 0; i < ntotal; ++i) {
279 		hwfd[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
280 		tskfd[i] = perf_event_open(&tsk_event, 0, -1, hwfd[i], 0);
281 	}
282 }
283 
cleanup(void)284 static void cleanup(void)
285 {
286 	int i;
287 
288 	for (i = 0; i < ntotal; i++) {
289 		if (hwfd[i] != -1)
290 			SAFE_CLOSE(hwfd[i]);
291 		if (tskfd[i] != -1)
292 			SAFE_CLOSE(tskfd[i]);
293 	}
294 
295 	if (tsk0 != -1)
296 		SAFE_CLOSE(tsk0);
297 }
298 
verify(void)299 static void verify(void)
300 {
301 	unsigned long long vt0, vt[MAX_CTRS], vh[MAX_CTRS];
302 	unsigned long long vtsum = 0, vhsum = 0;
303 	int i;
304 	double ratio;
305 	struct sched_param sparam = {.sched_priority = 1};
306 
307 	if (sched_setscheduler(0, SCHED_FIFO, &sparam)) {
308 		tst_brk(TBROK | TERRNO,
309 			"sched_setscheduler(0, SCHED_FIFO, ...) failed");
310 	}
311 
312 	all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
313 	do_work(8);
314 	/* stop groups with hw counters first before tsk0 */
315 	for (i = 0; i < ntotal; i++) {
316 		ioctl(hwfd[i], PERF_EVENT_IOC_DISABLE);
317 		ioctl(tskfd[i], PERF_EVENT_IOC_DISABLE);
318 	}
319 	all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
320 
321 	sparam.sched_priority = 0;
322 	if (sched_setscheduler(0, SCHED_OTHER, &sparam)) {
323 		tst_brk(TBROK | TERRNO,
324 			"sched_setscheduler(0, SCHED_OTHER, ...) failed");
325 	}
326 
327 	if (read(tsk0, &vt0, sizeof(vt0)) != sizeof(vt0))
328 		tst_brk(TBROK | TERRNO, "error reading task clock counter");
329 
330 	for (i = 0; i < ntotal; ++i) {
331 		if (read(tskfd[i], &vt[i], sizeof(vt[i])) != sizeof(vt[i]) ||
332 		    read(hwfd[i], &vh[i], sizeof(vh[i])) != sizeof(vh[i]))
333 			tst_brk(TBROK | TERRNO, "error reading counter(s)");
334 		vtsum += vt[i];
335 		vhsum += vh[i];
336 	}
337 
338 	tst_res(TINFO, "nhw: %d, overall task clock: %llu", nhw, vt0);
339 	tst_res(TINFO, "hw sum: %llu, task clock sum: %llu", vhsum, vtsum);
340 
341 	if (verbose) {
342 		tst_res(TINFO, "hw counters:");
343 		for (i = 0; i < ntotal; ++i)
344 			tst_res(TINFO, " %llu", vh[i]);
345 		tst_res(TINFO, "task clock counters:");
346 		for (i = 0; i < ntotal; ++i)
347 			tst_res(TINFO, " %llu", vt[i]);
348 	}
349 
350 	ratio = (double)vtsum / vt0;
351 	tst_res(TINFO, "ratio: %lf", ratio);
352 	if (ratio > nhw + 0.0001) {
353 		tst_res(TFAIL, "test failed (ratio was greater than %d)", nhw);
354 	} else {
355 		tst_res(TPASS, "test passed");
356 	}
357 }
358 
359 static struct tst_test test = {
360 	.setup = setup,
361 	.cleanup = cleanup,
362 	.options = options,
363 	.test_all = verify,
364 	.needs_root = 1,
365 };
366 
367 #else /* HAVE_PERF_EVENT_ATTR */
368 TST_TEST_TCONF("This system doesn't have <linux/perf_event.h> or "
369 	"struct perf_event_attr is not defined.");
370 #endif
371