1 /*
2 * Copyright (c) 2013 Ben Noordhuis <info@bnoordhuis.nl>
3 * Copyright (c) 2013-2015 Dmitry V. Levin <ldv@altlinux.org>
4 * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr@gmail.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "defs.h"
31
32 #include "perf_event_struct.h"
33
34 #include "xlat/clocknames.h"
35 #include "xlat/hw_breakpoint_len.h"
36 #include "xlat/hw_breakpoint_type.h"
37 #include "xlat/perf_attr_size.h"
38 #include "xlat/perf_branch_sample_type.h"
39 #include "xlat/perf_event_open_flags.h"
40 #include "xlat/perf_event_read_format.h"
41 #include "xlat/perf_event_sample_format.h"
42 #include "xlat/perf_hw_cache_id.h"
43 #include "xlat/perf_hw_cache_op_id.h"
44 #include "xlat/perf_hw_cache_op_result_id.h"
45 #include "xlat/perf_hw_id.h"
46 #include "xlat/perf_sw_ids.h"
47 #include "xlat/perf_type_id.h"
48
49 struct pea_desc {
50 struct perf_event_attr *attr;
51 uint32_t size;
52 };
53
54 static void
free_pea_desc(void * pea_desc_ptr)55 free_pea_desc(void *pea_desc_ptr)
56 {
57 struct pea_desc *desc = pea_desc_ptr;
58
59 free(desc->attr);
60 free(desc);
61 }
62
63 static int
fetch_perf_event_attr(struct tcb * const tcp,const kernel_ulong_t addr)64 fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
65 {
66 struct pea_desc *desc;
67 struct perf_event_attr *attr;
68 uint32_t size;
69
70 if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) {
71 printaddr(addr);
72 return 1;
73 }
74
75 if (size > sizeof(*attr))
76 size = sizeof(*attr);
77
78 if (!size)
79 size = PERF_ATTR_SIZE_VER0;
80
81 /*
82 * Kernel (rightfully) deems invalid attribute structures with size less
83 * than first published format size, and we do the same.
84 */
85 if (size < PERF_ATTR_SIZE_VER0) {
86 printaddr(addr);
87 return 1;
88 }
89
90 if (abbrev(tcp))
91 size = offsetofend(struct perf_event_attr, config);
92
93 /* Size should be multiple of 8, but kernel doesn't check for it */
94 /* size &= ~7; */
95
96 attr = xcalloc(1, sizeof(*attr));
97
98 if (umoven_or_printaddr(tcp, addr, size, attr)) {
99 free(attr);
100
101 return 1;
102 }
103
104 desc = xmalloc(sizeof(*desc));
105
106 desc->attr = attr;
107 desc->size = size;
108
109 set_tcb_priv_data(tcp, desc, free_pea_desc);
110
111 return 0;
112 }
113
114 #define PRINT_XLAT(prefix, xlat, x, dflt) \
115 do { \
116 tprints(prefix); \
117 printxval_search(xlat, x, dflt); \
118 } while (0)
119
120 static void
print_perf_event_attr(struct tcb * const tcp,const kernel_ulong_t addr)121 print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
122 {
123 static const char *precise_ip_desc[] = {
124 "arbitrary skid",
125 "constant skid",
126 "requested to have 0 skid",
127 "must have 0 skid",
128 };
129
130 struct pea_desc *desc;
131 struct perf_event_attr *attr;
132 uint32_t size;
133 uint32_t new_size;
134 int use_new_size = 0;
135
136 /*
137 * Amusingly, kernel accepts structures with only part of the field
138 * present, so we making check like this (instead of checking
139 * offsetofend against size) in order to print fields as kernel sees
140 * them. This also should work great on big endian architectures.
141 */
142 #define _PERF_CHECK_FIELD(_field) \
143 do { \
144 if (offsetof(struct perf_event_attr, _field) >= size) \
145 goto print_perf_event_attr_out; \
146 } while (0)
147
148 desc = get_tcb_priv_data(tcp);
149
150 attr = desc->attr;
151 size = desc->size;
152
153 /* The only error which expected to change size field currently */
154 if (tcp->u_error == E2BIG) {
155 if (umove(tcp, addr + offsetof(struct perf_event_attr, size),
156 &new_size))
157 use_new_size = -1;
158 else
159 use_new_size = 1;
160 }
161
162 PRINT_XLAT("{type=", perf_type_id, attr->type, "PERF_TYPE_???");
163 tprintf(", size=");
164 printxval(perf_attr_size, attr->size, "PERF_ATTR_SIZE_???");
165
166 if (use_new_size) {
167 tprints(" => ");
168
169 if (use_new_size > 0)
170 printxval(perf_attr_size, new_size,
171 "PERF_ATTR_SIZE_???");
172 else
173 tprints("???");
174 }
175
176 switch (attr->type) {
177 case PERF_TYPE_HARDWARE:
178 PRINT_XLAT(", config=", perf_hw_id, attr->config,
179 "PERF_COUNT_HW_???");
180 break;
181 case PERF_TYPE_SOFTWARE:
182 PRINT_XLAT(", config=", perf_sw_ids, attr->config,
183 "PERF_COUNT_SW_???");
184 break;
185 case PERF_TYPE_TRACEPOINT:
186 /*
187 * "The value to use in config can be obtained from under
188 * debugfs tracing/events/../../id if ftrace is enabled in the
189 * kernel."
190 */
191 tprintf(", config=%" PRIu64, attr->config);
192 break;
193 case PERF_TYPE_HW_CACHE:
194 /*
195 * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
196 * (perf_hw_cache_op_result_id << 16)
197 */
198 PRINT_XLAT(", config=", perf_hw_cache_id, attr->config & 0xFF,
199 "PERF_COUNT_HW_CACHE_???");
200 PRINT_XLAT("|", perf_hw_cache_op_id, (attr->config >> 8) & 0xFF,
201 "PERF_COUNT_HW_CACHE_OP_???");
202 /*
203 * Current code (see set_ext_hw_attr in arch/x86/events/core.c,
204 * tile_map_cache_event in arch/tile/kernel/perf_event.c,
205 * arc_pmu_cache_event in arch/arc/kernel/perf_event.c,
206 * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c,
207 * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c,
208 * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c,
209 * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c,
210 * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c,
211 * hw_perf_cache_event in arch/sh/kernel/perf_event.c,
212 * sparc_map_cache_event in arch/sparc/kernel/perf_event.c,
213 * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c,
214 * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes
215 * that cache result is 8 bits in size.
216 */
217 PRINT_XLAT("<<8|", perf_hw_cache_op_result_id,
218 (attr->config >> 16) & 0xFF,
219 "PERF_COUNT_HW_CACHE_RESULT_???");
220 tprintf("<<16");
221 if (attr->config >> 24)
222 tprintf("|%#" PRIx64 "<<24 "
223 "/* PERF_COUNT_HW_CACHE_??? */",
224 attr->config >> 24);
225 break;
226 case PERF_TYPE_RAW:
227 /*
228 * "If type is PERF_TYPE_RAW, then a custom "raw" config
229 * value is needed. Most CPUs support events that are not
230 * covered by the "generalized" events. These are
231 * implementation defined; see your CPU manual (for example the
232 * Intel Volume 3B documentation or the AMD BIOS and Kernel
233 * Developer Guide). The libpfm4 library can be used to
234 * translate from the name in the architectural manuals
235 * to the raw hex value perf_event_open() expects in this
236 * field."
237 */
238 case PERF_TYPE_BREAKPOINT:
239 /*
240 * "If type is PERF_TYPE_BREAKPOINT, then leave config set
241 * to zero. Its parameters are set in other places."
242 */
243 default:
244 tprintf(", config=%#" PRIx64, attr->config);
245 break;
246 }
247
248 if (abbrev(tcp))
249 goto print_perf_event_attr_out;
250
251 if (attr->freq)
252 tprintf(", sample_freq=%" PRIu64, attr->sample_freq);
253 else
254 tprintf(", sample_period=%" PRIu64, attr->sample_period);
255
256 tprintf(", sample_type=");
257 printflags64(perf_event_sample_format, attr->sample_type,
258 "PERF_SAMPLE_???");
259
260 tprintf(", read_format=");
261 printflags64(perf_event_read_format, attr->read_format,
262 "PERF_FORMAT_???");
263
264 tprintf(", disabled=%u"
265 ", inherit=%u"
266 ", pinned=%u"
267 ", exclusive=%u"
268 ", exclusive_user=%u"
269 ", exclude_kernel=%u"
270 ", exclude_hv=%u"
271 ", exclude_idle=%u"
272 ", mmap=%u"
273 ", comm=%u"
274 ", freq=%u"
275 ", inherit_stat=%u"
276 ", enable_on_exec=%u"
277 ", task=%u"
278 ", watermark=%u"
279 ", precise_ip=%u /* %s */"
280 ", mmap_data=%u"
281 ", sample_id_all=%u"
282 ", exclude_host=%u"
283 ", exclude_guest=%u"
284 ", exclude_callchain_kernel=%u"
285 ", exclude_callchain_user=%u"
286 ", mmap2=%u"
287 ", comm_exec=%u"
288 ", use_clockid=%u"
289 ", context_switch=%u"
290 ", write_backward=%u",
291 attr->disabled,
292 attr->inherit,
293 attr->pinned,
294 attr->exclusive,
295 attr->exclude_user,
296 attr->exclude_kernel,
297 attr->exclude_hv,
298 attr->exclude_idle,
299 attr->mmap,
300 attr->comm,
301 attr->freq,
302 attr->inherit_stat,
303 attr->enable_on_exec,
304 attr->task,
305 attr->watermark,
306 attr->precise_ip, precise_ip_desc[attr->precise_ip],
307 attr->mmap_data,
308 attr->sample_id_all,
309 attr->exclude_host,
310 attr->exclude_guest,
311 attr->exclude_callchain_kernel,
312 attr->exclude_callchain_user,
313 attr->mmap2,
314 attr->comm_exec,
315 attr->use_clockid,
316 attr->context_switch,
317 attr->write_backward);
318
319 /*
320 * Print it only in case it is non-zero, since it may contain flags we
321 * are not aware about.
322 */
323 if (attr->__reserved_1)
324 tprintf(", __reserved_1=%#" PRIx64 " /* Bits 63..28 */",
325 (uint64_t) attr->__reserved_1);
326
327 if (attr->watermark)
328 tprintf(", wakeup_watermark=%u", attr->wakeup_watermark);
329 else
330 tprintf(", wakeup_events=%u", attr->wakeup_events);
331
332 if (attr->type == PERF_TYPE_BREAKPOINT)
333 /* Any combination of R/W with X is deemed invalid */
334 PRINT_XLAT(", bp_type=", hw_breakpoint_type, attr->bp_type,
335 (attr->bp_type <=
336 (HW_BREAKPOINT_X | HW_BREAKPOINT_RW)) ?
337 "HW_BREAKPOINT_INVALID" :
338 "HW_BREAKPOINT_???");
339
340 if (attr->type == PERF_TYPE_BREAKPOINT)
341 tprintf(", bp_addr=%#" PRIx64, attr->bp_addr);
342 else
343 tprintf(", config1=%#" PRIx64, attr->config1);
344
345 /*
346 * Fields after bp_addr/config1 are optional and may not present; check
347 * against size is needed.
348 */
349
350 _PERF_CHECK_FIELD(bp_len);
351 if (attr->type == PERF_TYPE_BREAKPOINT)
352 tprintf(", bp_len=%" PRIu64, attr->bp_len);
353 else
354 tprintf(", config2=%#" PRIx64, attr->config2);
355
356 _PERF_CHECK_FIELD(branch_sample_type);
357 if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
358 tprintf(", branch_sample_type=");
359 printflags64(perf_branch_sample_type, attr->branch_sample_type,
360 "PERF_SAMPLE_BRANCH_???");
361 }
362
363 _PERF_CHECK_FIELD(sample_regs_user);
364 /*
365 * "This bit mask defines the set of user CPU registers to dump on
366 * samples. The layout of the register mask is architecture-specific and
367 * described in the kernel header
368 * arch/ARCH/include/uapi/asm/perf_regs.h."
369 */
370 tprintf(", sample_regs_user=%#" PRIx64, attr->sample_regs_user);
371
372 _PERF_CHECK_FIELD(sample_stack_user);
373 /*
374 * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is
375 * specified."
376 */
377 if (attr->sample_type & PERF_SAMPLE_STACK_USER)
378 tprintf(", sample_stack_user=%#" PRIx32,
379 attr->sample_stack_user);
380
381 if (attr->use_clockid) {
382 _PERF_CHECK_FIELD(clockid);
383 tprintf(", clockid=");
384 printxval(clocknames, attr->clockid, "CLOCK_???");
385 }
386
387 _PERF_CHECK_FIELD(sample_regs_intr);
388 tprintf(", sample_regs_intr=%#" PRIx64, attr->sample_regs_intr);
389
390 _PERF_CHECK_FIELD(aux_watermark);
391 tprintf(", aux_watermark=%" PRIu32, attr->aux_watermark);
392
393 _PERF_CHECK_FIELD(sample_max_stack);
394 tprintf(", sample_max_stack=%" PRIu16, attr->sample_max_stack);
395
396 /* _PERF_CHECK_FIELD(__reserved_2);
397 tprintf(", __reserved2=%" PRIu16, attr->__reserved_2); */
398
399 print_perf_event_attr_out:
400 if ((attr->size && (attr->size > size)) ||
401 (!attr->size && (size < PERF_ATTR_SIZE_VER0)))
402 tprintf(", ...");
403
404 tprintf("}");
405 }
406
SYS_FUNC(perf_event_open)407 SYS_FUNC(perf_event_open)
408 {
409 /*
410 * We try to copy out the whole structure on entering in order to check
411 * size value on exiting. We do not check the rest of the fields because
412 * they shouldn't be changed, but copy the whole structure instead
413 * of just size field because they could.
414 */
415 if (entering(tcp)) {
416 if (!fetch_perf_event_attr(tcp, tcp->u_arg[0]))
417 return 0;
418 } else {
419 print_perf_event_attr(tcp, tcp->u_arg[0]);
420 }
421
422 tprintf(", %d, %d, %d, ",
423 (int) tcp->u_arg[1],
424 (int) tcp->u_arg[2],
425 (int) tcp->u_arg[3]);
426 printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???");
427
428 return RVAL_DECODED | RVAL_FD;
429 }
430