1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2009 Paul Mackerras <paulus@samba.org>
4 * Copyright (c) 2019 Linux Test Project
5 */
6 /*
7 * Here's a little test program that checks whether software counters
8 * (specifically, the task clock counter) work correctly when they're in
9 * a group with hardware counters.
10 *
11 * What it does is to create several groups, each with one hardware
12 * counter, counting instructions, plus a task clock counter. It needs
13 * to know an upper bound N on the number of hardware counters you have
14 * (N defaults to 8), and it creates N+4 groups to force them to be
15 * multiplexed. It also creates an overall task clock counter.
16 *
17 * Then it spins for a while, and then stops all the counters and reads
18 * them. It takes the total of the task clock counters in the groups and
19 * computes the ratio of that total to the overall execution time from
20 * the overall task clock counter.
21 *
22 * That ratio should be equal to the number of actual hardware counters
23 * that can count instructions. If the task clock counters in the groups
24 * don't stop when their group gets taken off the PMU, the ratio will
25 * instead be close to N+4. The program will declare that the test fails
26 * if the ratio is greater than N (actually, N + 0.0001 to allow for FP
27 * rounding errors).
28 */
29
30 #define _GNU_SOURCE
31 #include <errno.h>
32 #include <sched.h>
33 #include <signal.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/prctl.h>
40 #include <sys/time.h>
41 #include <sys/types.h>
42
43 #include "config.h"
44 #include "tst_test.h"
45 #include "lapi/cpuset.h"
46 #include "lapi/syscalls.h"
47
48 #if HAVE_PERF_EVENT_ATTR
49 #include "perf_event_open.h"
50
51 #define MAX_CTRS 1000
52
53 struct read_format {
54 unsigned long long value;
55 /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
56 unsigned long long time_enabled;
57 /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
58 unsigned long long time_running;
59 };
60
61 static char *verbose;
62
63 static int ntotal, nhw;
64 static int tsk0 = -1, hwfd[MAX_CTRS], tskfd[MAX_CTRS];
65 static int volatile work_done;
66 static unsigned int est_loops;
67
all_counters_set(int state)68 static void all_counters_set(int state)
69 {
70 if (prctl(state) == -1)
71 tst_brk(TBROK | TERRNO, "prctl(%d) failed", state);
72 }
73
alarm_handler(int sig LTP_ATTRIBUTE_UNUSED)74 static void alarm_handler(int sig LTP_ATTRIBUTE_UNUSED)
75 {
76 work_done = 1;
77 }
78
bench_work(int time_ms)79 static void bench_work(int time_ms)
80 {
81 unsigned int i;
82 struct itimerval val;
83 struct sigaction sa;
84
85 memset(&sa, 0, sizeof(sa));
86 sa.sa_handler = alarm_handler;
87 sa.sa_flags = SA_RESETHAND;
88 SAFE_SIGACTION(SIGALRM, &sa, NULL);
89
90 work_done = 0;
91 memset(&val, 0, sizeof(val));
92 val.it_value.tv_sec = time_ms / 1000;
93 val.it_value.tv_usec = (time_ms % 1000) * 1000;
94
95 if (setitimer(ITIMER_REAL, &val, NULL))
96 tst_brk(TBROK | TERRNO, "setitimer");
97
98 while (!work_done) {
99 for (i = 0; i < 100000; ++i)
100 asm volatile (""::"g" (i));
101 est_loops++;
102 }
103
104 tst_res(TINFO, "bench_work estimated loops = %u in %d ms", est_loops, time_ms);
105 }
106
do_work(int mult)107 static void do_work(int mult)
108 {
109 unsigned long i, j, loops = mult * est_loops;
110
111 for (j = 0; j < loops; j++)
112 for (i = 0; i < 100000; i++)
113 asm volatile (""::"g" (i));
114 }
115
116 #ifndef __s390__
count_hardware_counters(void)117 static int count_hardware_counters(void)
118 {
119 struct perf_event_attr hw_event;
120 int i, hwctrs = 0;
121 int fdarry[MAX_CTRS];
122 struct read_format buf, buf2, diff;
123
124 memset(&hw_event, 0, sizeof(struct perf_event_attr));
125
126 hw_event.type = PERF_TYPE_HARDWARE;
127 hw_event.size = sizeof(struct perf_event_attr);
128 hw_event.disabled = 1;
129 hw_event.config = PERF_COUNT_HW_INSTRUCTIONS;
130 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
131 PERF_FORMAT_TOTAL_TIME_RUNNING;
132
133 for (i = 0; i < MAX_CTRS; i++) {
134 fdarry[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
135
136 all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
137 do_work(1);
138 if (read(fdarry[i], &buf, sizeof(buf)) != sizeof(buf))
139 tst_brk(TBROK | TERRNO, "error reading counter(s) #1");
140 do_work(1);
141 all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
142 if (read(fdarry[i], &buf2, sizeof(buf2)) != sizeof(buf2))
143 tst_brk(TBROK | TERRNO, "error reading counter(s) #2");
144
145 diff.value = buf2.value - buf.value;
146 diff.time_enabled = buf2.time_enabled - buf.time_enabled;
147 diff.time_running = buf2.time_running - buf.time_running;
148
149 tst_res(TINFO, "[%d] value:%lld time_enabled:%lld "
150 "time_running:%lld", i, diff.value,
151 diff.time_enabled, diff.time_running);
152
153 /*
154 * Normally time_enabled and time_running are the same value.
155 * But if more events are started than available counter slots
156 * on the PMU, then multiplexing happens and events run only
157 * part of the time. Time_enabled and time_running's values
158 * will be different. In this case the time_enabled and time_
159 * running values can be used to scale an estimated value for
160 * the count. So if buf.time_enabled and buf.time_running are
161 * not equal, we can think that PMU hardware counters
162 * multiplexing happens and the number of the opened events
163 * are the number of max available hardware counters.
164 */
165 if (diff.time_enabled != diff.time_running) {
166 hwctrs = i;
167 break;
168 }
169 }
170
171 for (i = 0; i <= hwctrs; i++)
172 SAFE_CLOSE(fdarry[i]);
173
174 return hwctrs;
175 }
176 #endif /* __s390__ */
177
bind_to_current_cpu(void)178 static void bind_to_current_cpu(void)
179 {
180 #ifdef HAVE_SCHED_GETCPU
181 int cpu = sched_getcpu();
182 size_t mask_size;
183 cpu_set_t *mask;
184
185 if (cpu == -1)
186 tst_brk(TBROK | TERRNO, "sched_getcpu() failed");
187
188 mask = CPU_ALLOC(cpu + 1);
189 mask_size = CPU_ALLOC_SIZE(cpu + 1);
190 CPU_ZERO_S(mask_size, mask);
191 CPU_SET(cpu, mask);
192 if (sched_setaffinity(0, mask_size, mask) == -1)
193 tst_brk(TBROK | TERRNO, "sched_setaffinity() failed");
194 CPU_FREE(mask);
195 #endif
196 }
197
setup(void)198 static void setup(void)
199 {
200 int i;
201 struct perf_event_attr tsk_event, hw_event;
202
203 for (i = 0; i < MAX_CTRS; i++) {
204 hwfd[i] = -1;
205 tskfd[i] = -1;
206 }
207
208 bench_work(500);
209
210 /*
211 * According to perf_event_open's manpage, the official way of
212 * knowing if perf_event_open() support is enabled is checking for
213 * the existence of the file /proc/sys/kernel/perf_event_paranoid.
214 */
215 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1)
216 tst_brk(TCONF, "Kernel doesn't have perf_event support");
217
218 bind_to_current_cpu();
219 #ifdef __s390__
220 /*
221 * On s390 the "time_enabled" and "time_running" values are always the
222 * same, therefore count_hardware_counters() does not work.
223 *
224 * There are distinct/dedicated counters that can be used independently.
225 * Use the dedicated counter for instructions here.
226 */
227 ntotal = nhw = 1;
228 #else
229 nhw = count_hardware_counters();
230 ntotal = nhw + 4;
231 #endif
232
233 memset(&hw_event, 0, sizeof(struct perf_event_attr));
234 memset(&tsk_event, 0, sizeof(struct perf_event_attr));
235
236 tsk_event.type = PERF_TYPE_SOFTWARE;
237 tsk_event.size = sizeof(struct perf_event_attr);
238 tsk_event.disabled = 1;
239 tsk_event.config = PERF_COUNT_SW_TASK_CLOCK;
240
241 hw_event.type = PERF_TYPE_HARDWARE;
242 hw_event.size = sizeof(struct perf_event_attr);
243 hw_event.disabled = 1;
244 hw_event.config = PERF_COUNT_HW_INSTRUCTIONS;
245
246 tsk0 = perf_event_open(&tsk_event, 0, -1, -1, 0);
247 tsk_event.disabled = 0;
248 for (i = 0; i < ntotal; ++i) {
249 hwfd[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
250 tskfd[i] = perf_event_open(&tsk_event, 0, -1, hwfd[i], 0);
251 }
252 }
253
cleanup(void)254 static void cleanup(void)
255 {
256 int i;
257
258 for (i = 0; i < ntotal; i++) {
259 if (hwfd[i] != -1)
260 SAFE_CLOSE(hwfd[i]);
261 if (tskfd[i] != -1)
262 SAFE_CLOSE(tskfd[i]);
263 }
264
265 if (tsk0 != -1)
266 SAFE_CLOSE(tsk0);
267 }
268
verify(void)269 static void verify(void)
270 {
271 unsigned long long vt0, vt[MAX_CTRS], vh[MAX_CTRS];
272 unsigned long long vtsum = 0, vhsum = 0;
273 int i;
274 double ratio;
275 struct sched_param sparam = {.sched_priority = 1};
276
277 if (sched_setscheduler(0, SCHED_FIFO, &sparam)) {
278 tst_brk(TBROK | TERRNO,
279 "sched_setscheduler(0, SCHED_FIFO, ...) failed");
280 }
281
282 all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
283 do_work(8);
284 /* stop groups with hw counters first before tsk0 */
285 for (i = 0; i < ntotal; i++) {
286 ioctl(hwfd[i], PERF_EVENT_IOC_DISABLE);
287 ioctl(tskfd[i], PERF_EVENT_IOC_DISABLE);
288 }
289 all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
290
291 sparam.sched_priority = 0;
292 if (sched_setscheduler(0, SCHED_OTHER, &sparam)) {
293 tst_brk(TBROK | TERRNO,
294 "sched_setscheduler(0, SCHED_OTHER, ...) failed");
295 }
296
297 if (read(tsk0, &vt0, sizeof(vt0)) != sizeof(vt0))
298 tst_brk(TBROK | TERRNO, "error reading task clock counter");
299
300 for (i = 0; i < ntotal; ++i) {
301 if (read(tskfd[i], &vt[i], sizeof(vt[i])) != sizeof(vt[i]) ||
302 read(hwfd[i], &vh[i], sizeof(vh[i])) != sizeof(vh[i]))
303 tst_brk(TBROK | TERRNO, "error reading counter(s)");
304 vtsum += vt[i];
305 vhsum += vh[i];
306 }
307
308 tst_res(TINFO, "nhw: %d, overall task clock: %llu", nhw, vt0);
309 tst_res(TINFO, "hw sum: %llu, task clock sum: %llu", vhsum, vtsum);
310
311 if (verbose) {
312 tst_res(TINFO, "hw counters:");
313 for (i = 0; i < ntotal; ++i)
314 tst_res(TINFO, " %llu", vh[i]);
315 tst_res(TINFO, "task clock counters:");
316 for (i = 0; i < ntotal; ++i)
317 tst_res(TINFO, " %llu", vt[i]);
318 }
319
320 ratio = (double)vtsum / vt0;
321 tst_res(TINFO, "ratio: %lf", ratio);
322 if (ratio > nhw + 0.0001) {
323 tst_res(TFAIL, "test failed (ratio was greater than %d)", nhw);
324 } else {
325 tst_res(TPASS, "test passed");
326 }
327 }
328
329 static struct tst_test test = {
330 .setup = setup,
331 .cleanup = cleanup,
332 .options = (struct tst_option[]) {
333 {"v", &verbose, "Verbose output"},
334 {},
335 },
336 .test_all = verify,
337 .needs_root = 1,
338 };
339
340 #else /* HAVE_PERF_EVENT_ATTR */
341 TST_TEST_TCONF("This system doesn't have <linux/perf_event.h> or "
342 "struct perf_event_attr is not defined.");
343 #endif
344