1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
42
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK 0x20000
46 #endif
47
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON 100
50
51 #endif
52
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE 12
55 #endif
56
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE 13
59 #endif
60
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE 1
63 #endif
64
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK 00004000
67 #endif
68
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC 02000000
71 #endif
72
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC 02000000
75 #endif
76
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP 6
79 #endif
80
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC 02000000
83 #endif
84
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK 00004000
87 #endif
88
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC 0x40000000
91 #endif
92
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
95 #endif
96
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
99 #endif
100
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
104
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
107 #endif
108
109
110 struct tp_field {
111 int offset;
112 union {
113 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115 };
116 };
117
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120 { \
121 u##bits value; \
122 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123 return value; \
124 }
125
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
130
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133 { \
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return bswap_##bits(value);\
137 }
138
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
142
tp_field__init_uint(struct tp_field * field,struct format_field * format_field,bool needs_swap)143 static int tp_field__init_uint(struct tp_field *field,
144 struct format_field *format_field,
145 bool needs_swap)
146 {
147 field->offset = format_field->offset;
148
149 switch (format_field->size) {
150 case 1:
151 field->integer = tp_field__u8;
152 break;
153 case 2:
154 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155 break;
156 case 4:
157 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158 break;
159 case 8:
160 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161 break;
162 default:
163 return -1;
164 }
165
166 return 0;
167 }
168
tp_field__ptr(struct tp_field * field,struct perf_sample * sample)169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170 {
171 return sample->raw_data + field->offset;
172 }
173
tp_field__init_ptr(struct tp_field * field,struct format_field * format_field)174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175 {
176 field->offset = format_field->offset;
177 field->pointer = tp_field__ptr;
178 return 0;
179 }
180
181 struct syscall_tp {
182 struct tp_field id;
183 union {
184 struct tp_field args, ret;
185 };
186 };
187
perf_evsel__init_tp_uint_field(struct perf_evsel * evsel,struct tp_field * field,const char * name)188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189 struct tp_field *field,
190 const char *name)
191 {
192 struct format_field *format_field = perf_evsel__field(evsel, name);
193
194 if (format_field == NULL)
195 return -1;
196
197 return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 }
199
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201 ({ struct syscall_tp *sc = evsel->priv;\
202 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203
perf_evsel__init_tp_ptr_field(struct perf_evsel * evsel,struct tp_field * field,const char * name)204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205 struct tp_field *field,
206 const char *name)
207 {
208 struct format_field *format_field = perf_evsel__field(evsel, name);
209
210 if (format_field == NULL)
211 return -1;
212
213 return tp_field__init_ptr(field, format_field);
214 }
215
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217 ({ struct syscall_tp *sc = evsel->priv;\
218 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219
perf_evsel__delete_priv(struct perf_evsel * evsel)220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221 {
222 zfree(&evsel->priv);
223 perf_evsel__delete(evsel);
224 }
225
perf_evsel__init_syscall_tp(struct perf_evsel * evsel,void * handler)226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227 {
228 evsel->priv = malloc(sizeof(struct syscall_tp));
229 if (evsel->priv != NULL) {
230 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231 goto out_delete;
232
233 evsel->handler = handler;
234 return 0;
235 }
236
237 return -ENOMEM;
238
239 out_delete:
240 zfree(&evsel->priv);
241 return -ENOENT;
242 }
243
perf_evsel__syscall_newtp(const char * direction,void * handler)244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245 {
246 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247
248 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249 if (IS_ERR(evsel))
250 evsel = perf_evsel__newtp("syscalls", direction);
251
252 if (IS_ERR(evsel))
253 return NULL;
254
255 if (perf_evsel__init_syscall_tp(evsel, handler))
256 goto out_delete;
257
258 return evsel;
259
260 out_delete:
261 perf_evsel__delete_priv(evsel);
262 return NULL;
263 }
264
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266 ({ struct syscall_tp *fields = evsel->priv; \
267 fields->name.integer(&fields->name, sample); })
268
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270 ({ struct syscall_tp *fields = evsel->priv; \
271 fields->name.pointer(&fields->name, sample); })
272
273 struct syscall_arg {
274 unsigned long val;
275 struct thread *thread;
276 struct trace *trace;
277 void *parm;
278 u8 idx;
279 u8 mask;
280 };
281
282 struct strarray {
283 int offset;
284 int nr_entries;
285 const char **entries;
286 };
287
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 .nr_entries = ARRAY_SIZE(array), \
290 .entries = array, \
291 }
292
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 .offset = off, \
295 .nr_entries = ARRAY_SIZE(array), \
296 .entries = array, \
297 }
298
__syscall_arg__scnprintf_strarray(char * bf,size_t size,const char * intfmt,struct syscall_arg * arg)299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 const char *intfmt,
301 struct syscall_arg *arg)
302 {
303 struct strarray *sa = arg->parm;
304 int idx = arg->val - sa->offset;
305
306 if (idx < 0 || idx >= sa->nr_entries)
307 return scnprintf(bf, size, intfmt, arg->val);
308
309 return scnprintf(bf, size, "%s", sa->entries[idx]);
310 }
311
syscall_arg__scnprintf_strarray(char * bf,size_t size,struct syscall_arg * arg)312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 struct syscall_arg *arg)
314 {
315 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 }
317
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319
320 #if defined(__i386__) || defined(__x86_64__)
321 /*
322 * FIXME: Make this available to all arches as soon as the ioctl beautifier
323 * gets rewritten to support all arches.
324 */
syscall_arg__scnprintf_strhexarray(char * bf,size_t size,struct syscall_arg * arg)325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 struct syscall_arg *arg)
327 {
328 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 }
330
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
333
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 struct syscall_arg *arg);
336
337 #define SCA_FD syscall_arg__scnprintf_fd
338
syscall_arg__scnprintf_fd_at(char * bf,size_t size,struct syscall_arg * arg)339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 struct syscall_arg *arg)
341 {
342 int fd = arg->val;
343
344 if (fd == AT_FDCWD)
345 return scnprintf(bf, size, "CWD");
346
347 return syscall_arg__scnprintf_fd(bf, size, arg);
348 }
349
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
351
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 struct syscall_arg *arg);
354
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356
syscall_arg__scnprintf_hex(char * bf,size_t size,struct syscall_arg * arg)357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358 struct syscall_arg *arg)
359 {
360 return scnprintf(bf, size, "%#lx", arg->val);
361 }
362
363 #define SCA_HEX syscall_arg__scnprintf_hex
364
syscall_arg__scnprintf_int(char * bf,size_t size,struct syscall_arg * arg)365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 struct syscall_arg *arg)
367 {
368 return scnprintf(bf, size, "%d", arg->val);
369 }
370
371 #define SCA_INT syscall_arg__scnprintf_int
372
syscall_arg__scnprintf_mmap_prot(char * bf,size_t size,struct syscall_arg * arg)373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374 struct syscall_arg *arg)
375 {
376 int printed = 0, prot = arg->val;
377
378 if (prot == PROT_NONE)
379 return scnprintf(bf, size, "NONE");
380 #define P_MMAP_PROT(n) \
381 if (prot & PROT_##n) { \
382 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383 prot &= ~PROT_##n; \
384 }
385
386 P_MMAP_PROT(EXEC);
387 P_MMAP_PROT(READ);
388 P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390 P_MMAP_PROT(SEM);
391 #endif
392 P_MMAP_PROT(GROWSDOWN);
393 P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
395
396 if (prot)
397 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398
399 return printed;
400 }
401
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403
syscall_arg__scnprintf_mmap_flags(char * bf,size_t size,struct syscall_arg * arg)404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405 struct syscall_arg *arg)
406 {
407 int printed = 0, flags = arg->val;
408
409 #define P_MMAP_FLAG(n) \
410 if (flags & MAP_##n) { \
411 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412 flags &= ~MAP_##n; \
413 }
414
415 P_MMAP_FLAG(SHARED);
416 P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418 P_MMAP_FLAG(32BIT);
419 #endif
420 P_MMAP_FLAG(ANONYMOUS);
421 P_MMAP_FLAG(DENYWRITE);
422 P_MMAP_FLAG(EXECUTABLE);
423 P_MMAP_FLAG(FILE);
424 P_MMAP_FLAG(FIXED);
425 P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427 P_MMAP_FLAG(HUGETLB);
428 #endif
429 P_MMAP_FLAG(LOCKED);
430 P_MMAP_FLAG(NONBLOCK);
431 P_MMAP_FLAG(NORESERVE);
432 P_MMAP_FLAG(POPULATE);
433 P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435 P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
438
439 if (flags)
440 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441
442 return printed;
443 }
444
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446
syscall_arg__scnprintf_mremap_flags(char * bf,size_t size,struct syscall_arg * arg)447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448 struct syscall_arg *arg)
449 {
450 int printed = 0, flags = arg->val;
451
452 #define P_MREMAP_FLAG(n) \
453 if (flags & MREMAP_##n) { \
454 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455 flags &= ~MREMAP_##n; \
456 }
457
458 P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460 P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
463
464 if (flags)
465 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466
467 return printed;
468 }
469
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471
syscall_arg__scnprintf_madvise_behavior(char * bf,size_t size,struct syscall_arg * arg)472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473 struct syscall_arg *arg)
474 {
475 int behavior = arg->val;
476
477 switch (behavior) {
478 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479 P_MADV_BHV(NORMAL);
480 P_MADV_BHV(RANDOM);
481 P_MADV_BHV(SEQUENTIAL);
482 P_MADV_BHV(WILLNEED);
483 P_MADV_BHV(DONTNEED);
484 P_MADV_BHV(REMOVE);
485 P_MADV_BHV(DONTFORK);
486 P_MADV_BHV(DOFORK);
487 P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489 P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491 P_MADV_BHV(MERGEABLE);
492 P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494 P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497 P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500 P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503 P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506 default: break;
507 }
508
509 return scnprintf(bf, size, "%#x", behavior);
510 }
511
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513
syscall_arg__scnprintf_flock(char * bf,size_t size,struct syscall_arg * arg)514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515 struct syscall_arg *arg)
516 {
517 int printed = 0, op = arg->val;
518
519 if (op == 0)
520 return scnprintf(bf, size, "NONE");
521 #define P_CMD(cmd) \
522 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524 op &= ~LOCK_##cmd; \
525 }
526
527 P_CMD(SH);
528 P_CMD(EX);
529 P_CMD(NB);
530 P_CMD(UN);
531 P_CMD(MAND);
532 P_CMD(RW);
533 P_CMD(READ);
534 P_CMD(WRITE);
535 #undef P_OP
536
537 if (op)
538 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539
540 return printed;
541 }
542
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
544
syscall_arg__scnprintf_futex_op(char * bf,size_t size,struct syscall_arg * arg)545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546 {
547 enum syscall_futex_args {
548 SCF_UADDR = (1 << 0),
549 SCF_OP = (1 << 1),
550 SCF_VAL = (1 << 2),
551 SCF_TIMEOUT = (1 << 3),
552 SCF_UADDR2 = (1 << 4),
553 SCF_VAL3 = (1 << 5),
554 };
555 int op = arg->val;
556 int cmd = op & FUTEX_CMD_MASK;
557 size_t printed = 0;
558
559 switch (cmd) {
560 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
562 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
565 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
566 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
567 P_FUTEX_OP(WAKE_OP); break;
568 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
571 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
572 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
573 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
574 default: printed = scnprintf(bf, size, "%#x", cmd); break;
575 }
576
577 if (op & FUTEX_PRIVATE_FLAG)
578 printed += scnprintf(bf + printed, size - printed, "|PRIV");
579
580 if (op & FUTEX_CLOCK_REALTIME)
581 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582
583 return printed;
584 }
585
586 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
587
588 static const char *bpf_cmd[] = {
589 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590 "MAP_GET_NEXT_KEY", "PROG_LOAD",
591 };
592 static DEFINE_STRARRAY(bpf_cmd);
593
594 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
596
597 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598 static DEFINE_STRARRAY(itimers);
599
600 static const char *keyctl_options[] = {
601 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
606 };
607 static DEFINE_STRARRAY(keyctl_options);
608
609 static const char *whences[] = { "SET", "CUR", "END",
610 #ifdef SEEK_DATA
611 "DATA",
612 #endif
613 #ifdef SEEK_HOLE
614 "HOLE",
615 #endif
616 };
617 static DEFINE_STRARRAY(whences);
618
619 static const char *fcntl_cmds[] = {
620 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623 "F_GETOWNER_UIDS",
624 };
625 static DEFINE_STRARRAY(fcntl_cmds);
626
627 static const char *rlimit_resources[] = {
628 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630 "RTTIME",
631 };
632 static DEFINE_STRARRAY(rlimit_resources);
633
634 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635 static DEFINE_STRARRAY(sighow);
636
637 static const char *clockid[] = {
638 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
641 };
642 static DEFINE_STRARRAY(clockid);
643
644 static const char *socket_families[] = {
645 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650 "ALG", "NFC", "VSOCK",
651 };
652 static DEFINE_STRARRAY(socket_families);
653
654 #ifndef SOCK_TYPE_MASK
655 #define SOCK_TYPE_MASK 0xf
656 #endif
657
syscall_arg__scnprintf_socket_type(char * bf,size_t size,struct syscall_arg * arg)658 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659 struct syscall_arg *arg)
660 {
661 size_t printed;
662 int type = arg->val,
663 flags = type & ~SOCK_TYPE_MASK;
664
665 type &= SOCK_TYPE_MASK;
666 /*
667 * Can't use a strarray, MIPS may override for ABI reasons.
668 */
669 switch (type) {
670 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671 P_SK_TYPE(STREAM);
672 P_SK_TYPE(DGRAM);
673 P_SK_TYPE(RAW);
674 P_SK_TYPE(RDM);
675 P_SK_TYPE(SEQPACKET);
676 P_SK_TYPE(DCCP);
677 P_SK_TYPE(PACKET);
678 #undef P_SK_TYPE
679 default:
680 printed = scnprintf(bf, size, "%#x", type);
681 }
682
683 #define P_SK_FLAG(n) \
684 if (flags & SOCK_##n) { \
685 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686 flags &= ~SOCK_##n; \
687 }
688
689 P_SK_FLAG(CLOEXEC);
690 P_SK_FLAG(NONBLOCK);
691 #undef P_SK_FLAG
692
693 if (flags)
694 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
695
696 return printed;
697 }
698
699 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
700
701 #ifndef MSG_PROBE
702 #define MSG_PROBE 0x10
703 #endif
704 #ifndef MSG_WAITFORONE
705 #define MSG_WAITFORONE 0x10000
706 #endif
707 #ifndef MSG_SENDPAGE_NOTLAST
708 #define MSG_SENDPAGE_NOTLAST 0x20000
709 #endif
710 #ifndef MSG_FASTOPEN
711 #define MSG_FASTOPEN 0x20000000
712 #endif
713
syscall_arg__scnprintf_msg_flags(char * bf,size_t size,struct syscall_arg * arg)714 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715 struct syscall_arg *arg)
716 {
717 int printed = 0, flags = arg->val;
718
719 if (flags == 0)
720 return scnprintf(bf, size, "NONE");
721 #define P_MSG_FLAG(n) \
722 if (flags & MSG_##n) { \
723 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724 flags &= ~MSG_##n; \
725 }
726
727 P_MSG_FLAG(OOB);
728 P_MSG_FLAG(PEEK);
729 P_MSG_FLAG(DONTROUTE);
730 P_MSG_FLAG(TRYHARD);
731 P_MSG_FLAG(CTRUNC);
732 P_MSG_FLAG(PROBE);
733 P_MSG_FLAG(TRUNC);
734 P_MSG_FLAG(DONTWAIT);
735 P_MSG_FLAG(EOR);
736 P_MSG_FLAG(WAITALL);
737 P_MSG_FLAG(FIN);
738 P_MSG_FLAG(SYN);
739 P_MSG_FLAG(CONFIRM);
740 P_MSG_FLAG(RST);
741 P_MSG_FLAG(ERRQUEUE);
742 P_MSG_FLAG(NOSIGNAL);
743 P_MSG_FLAG(MORE);
744 P_MSG_FLAG(WAITFORONE);
745 P_MSG_FLAG(SENDPAGE_NOTLAST);
746 P_MSG_FLAG(FASTOPEN);
747 P_MSG_FLAG(CMSG_CLOEXEC);
748 #undef P_MSG_FLAG
749
750 if (flags)
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753 return printed;
754 }
755
756 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
757
syscall_arg__scnprintf_access_mode(char * bf,size_t size,struct syscall_arg * arg)758 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759 struct syscall_arg *arg)
760 {
761 size_t printed = 0;
762 int mode = arg->val;
763
764 if (mode == F_OK) /* 0 */
765 return scnprintf(bf, size, "F");
766 #define P_MODE(n) \
767 if (mode & n##_OK) { \
768 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769 mode &= ~n##_OK; \
770 }
771
772 P_MODE(R);
773 P_MODE(W);
774 P_MODE(X);
775 #undef P_MODE
776
777 if (mode)
778 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
779
780 return printed;
781 }
782
783 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
784
785 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786 struct syscall_arg *arg);
787
788 #define SCA_FILENAME syscall_arg__scnprintf_filename
789
syscall_arg__scnprintf_open_flags(char * bf,size_t size,struct syscall_arg * arg)790 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791 struct syscall_arg *arg)
792 {
793 int printed = 0, flags = arg->val;
794
795 if (!(flags & O_CREAT))
796 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
797
798 if (flags == 0)
799 return scnprintf(bf, size, "RDONLY");
800 #define P_FLAG(n) \
801 if (flags & O_##n) { \
802 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803 flags &= ~O_##n; \
804 }
805
806 P_FLAG(APPEND);
807 P_FLAG(ASYNC);
808 P_FLAG(CLOEXEC);
809 P_FLAG(CREAT);
810 P_FLAG(DIRECT);
811 P_FLAG(DIRECTORY);
812 P_FLAG(EXCL);
813 P_FLAG(LARGEFILE);
814 P_FLAG(NOATIME);
815 P_FLAG(NOCTTY);
816 #ifdef O_NONBLOCK
817 P_FLAG(NONBLOCK);
818 #elif O_NDELAY
819 P_FLAG(NDELAY);
820 #endif
821 #ifdef O_PATH
822 P_FLAG(PATH);
823 #endif
824 P_FLAG(RDWR);
825 #ifdef O_DSYNC
826 if ((flags & O_SYNC) == O_SYNC)
827 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828 else {
829 P_FLAG(DSYNC);
830 }
831 #else
832 P_FLAG(SYNC);
833 #endif
834 P_FLAG(TRUNC);
835 P_FLAG(WRONLY);
836 #undef P_FLAG
837
838 if (flags)
839 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840
841 return printed;
842 }
843
844 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
845
syscall_arg__scnprintf_perf_flags(char * bf,size_t size,struct syscall_arg * arg)846 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847 struct syscall_arg *arg)
848 {
849 int printed = 0, flags = arg->val;
850
851 if (flags == 0)
852 return 0;
853
854 #define P_FLAG(n) \
855 if (flags & PERF_FLAG_##n) { \
856 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857 flags &= ~PERF_FLAG_##n; \
858 }
859
860 P_FLAG(FD_NO_GROUP);
861 P_FLAG(FD_OUTPUT);
862 P_FLAG(PID_CGROUP);
863 P_FLAG(FD_CLOEXEC);
864 #undef P_FLAG
865
866 if (flags)
867 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
868
869 return printed;
870 }
871
872 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
873
syscall_arg__scnprintf_eventfd_flags(char * bf,size_t size,struct syscall_arg * arg)874 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875 struct syscall_arg *arg)
876 {
877 int printed = 0, flags = arg->val;
878
879 if (flags == 0)
880 return scnprintf(bf, size, "NONE");
881 #define P_FLAG(n) \
882 if (flags & EFD_##n) { \
883 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884 flags &= ~EFD_##n; \
885 }
886
887 P_FLAG(SEMAPHORE);
888 P_FLAG(CLOEXEC);
889 P_FLAG(NONBLOCK);
890 #undef P_FLAG
891
892 if (flags)
893 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
894
895 return printed;
896 }
897
898 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
899
syscall_arg__scnprintf_pipe_flags(char * bf,size_t size,struct syscall_arg * arg)900 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901 struct syscall_arg *arg)
902 {
903 int printed = 0, flags = arg->val;
904
905 #define P_FLAG(n) \
906 if (flags & O_##n) { \
907 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908 flags &= ~O_##n; \
909 }
910
911 P_FLAG(CLOEXEC);
912 P_FLAG(NONBLOCK);
913 #undef P_FLAG
914
915 if (flags)
916 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
917
918 return printed;
919 }
920
921 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
922
syscall_arg__scnprintf_signum(char * bf,size_t size,struct syscall_arg * arg)923 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
924 {
925 int sig = arg->val;
926
927 switch (sig) {
928 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929 P_SIGNUM(HUP);
930 P_SIGNUM(INT);
931 P_SIGNUM(QUIT);
932 P_SIGNUM(ILL);
933 P_SIGNUM(TRAP);
934 P_SIGNUM(ABRT);
935 P_SIGNUM(BUS);
936 P_SIGNUM(FPE);
937 P_SIGNUM(KILL);
938 P_SIGNUM(USR1);
939 P_SIGNUM(SEGV);
940 P_SIGNUM(USR2);
941 P_SIGNUM(PIPE);
942 P_SIGNUM(ALRM);
943 P_SIGNUM(TERM);
944 P_SIGNUM(CHLD);
945 P_SIGNUM(CONT);
946 P_SIGNUM(STOP);
947 P_SIGNUM(TSTP);
948 P_SIGNUM(TTIN);
949 P_SIGNUM(TTOU);
950 P_SIGNUM(URG);
951 P_SIGNUM(XCPU);
952 P_SIGNUM(XFSZ);
953 P_SIGNUM(VTALRM);
954 P_SIGNUM(PROF);
955 P_SIGNUM(WINCH);
956 P_SIGNUM(IO);
957 P_SIGNUM(PWR);
958 P_SIGNUM(SYS);
959 #ifdef SIGEMT
960 P_SIGNUM(EMT);
961 #endif
962 #ifdef SIGSTKFLT
963 P_SIGNUM(STKFLT);
964 #endif
965 #ifdef SIGSWI
966 P_SIGNUM(SWI);
967 #endif
968 default: break;
969 }
970
971 return scnprintf(bf, size, "%#x", sig);
972 }
973
974 #define SCA_SIGNUM syscall_arg__scnprintf_signum
975
976 #if defined(__i386__) || defined(__x86_64__)
977 /*
978 * FIXME: Make this available to all arches.
979 */
980 #define TCGETS 0x5401
981
982 static const char *tioctls[] = {
983 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
998 };
999
1000 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001 #endif /* defined(__i386__) || defined(__x86_64__) */
1002
1003 #define STRARRAY(arg, name, array) \
1004 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005 .arg_parm = { [arg] = &strarray__##array, }
1006
1007 static struct syscall_fmt {
1008 const char *name;
1009 const char *alias;
1010 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011 void *arg_parm[6];
1012 bool errmsg;
1013 bool timeout;
1014 bool hexret;
1015 } syscall_fmts[] = {
1016 { .name = "access", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018 [1] = SCA_ACCMODE, /* mode */ }, },
1019 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021 { .name = "brk", .hexret = true,
1022 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023 { .name = "chdir", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025 { .name = "chmod", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027 { .name = "chroot", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030 { .name = "close", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032 { .name = "connect", .errmsg = true, },
1033 { .name = "creat", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035 { .name = "dup", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 { .name = "dup2", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "dup3", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042 { .name = "eventfd2", .errmsg = true,
1043 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044 { .name = "faccessat", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046 [1] = SCA_FILENAME, /* filename */ }, },
1047 { .name = "fadvise64", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049 { .name = "fallocate", .errmsg = true,
1050 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051 { .name = "fchdir", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 { .name = "fchmod", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055 { .name = "fchmodat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057 [1] = SCA_FILENAME, /* filename */ }, },
1058 { .name = "fchown", .errmsg = true,
1059 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060 { .name = "fchownat", .errmsg = true,
1061 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062 [1] = SCA_FILENAME, /* filename */ }, },
1063 { .name = "fcntl", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065 [1] = SCA_STRARRAY, /* cmd */ },
1066 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067 { .name = "fdatasync", .errmsg = true,
1068 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 { .name = "flock", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071 [1] = SCA_FLOCK, /* cmd */ }, },
1072 { .name = "fsetxattr", .errmsg = true,
1073 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1075 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1077 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078 [1] = SCA_FILENAME, /* filename */ }, },
1079 { .name = "fstatfs", .errmsg = true,
1080 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081 { .name = "fsync", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083 { .name = "ftruncate", .errmsg = true,
1084 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085 { .name = "futex", .errmsg = true,
1086 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087 { .name = "futimesat", .errmsg = true,
1088 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089 [1] = SCA_FILENAME, /* filename */ }, },
1090 { .name = "getdents", .errmsg = true,
1091 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092 { .name = "getdents64", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1095 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096 { .name = "getxattr", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098 { .name = "inotify_add_watch", .errmsg = true,
1099 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100 { .name = "ioctl", .errmsg = true,
1101 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102 #if defined(__i386__) || defined(__x86_64__)
1103 /*
1104 * FIXME: Make this available to all arches.
1105 */
1106 [1] = SCA_STRHEXARRAY, /* cmd */
1107 [2] = SCA_HEX, /* arg */ },
1108 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1109 #else
1110 [2] = SCA_HEX, /* arg */ }, },
1111 #endif
1112 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113 { .name = "kill", .errmsg = true,
1114 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115 { .name = "lchown", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117 { .name = "lgetxattr", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119 { .name = "linkat", .errmsg = true,
1120 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121 { .name = "listxattr", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123 { .name = "llistxattr", .errmsg = true,
1124 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125 { .name = "lremovexattr", .errmsg = true,
1126 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127 { .name = "lseek", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129 [2] = SCA_STRARRAY, /* whence */ },
1130 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1131 { .name = "lsetxattr", .errmsg = true,
1132 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1134 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135 { .name = "lsxattr", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137 { .name = "madvise", .errmsg = true,
1138 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1139 [2] = SCA_MADV_BHV, /* behavior */ }, },
1140 { .name = "mkdir", .errmsg = true,
1141 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142 { .name = "mkdirat", .errmsg = true,
1143 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144 [1] = SCA_FILENAME, /* pathname */ }, },
1145 { .name = "mknod", .errmsg = true,
1146 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147 { .name = "mknodat", .errmsg = true,
1148 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149 [1] = SCA_FILENAME, /* filename */ }, },
1150 { .name = "mlock", .errmsg = true,
1151 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152 { .name = "mlockall", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154 { .name = "mmap", .hexret = true,
1155 /* The standard mmap maps to old_mmap on s390x */
1156 #if defined(__s390x__)
1157 .alias = "old_mmap",
1158 #endif
1159 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1160 [2] = SCA_MMAP_PROT, /* prot */
1161 [3] = SCA_MMAP_FLAGS, /* flags */
1162 [4] = SCA_FD, /* fd */ }, },
1163 { .name = "mprotect", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1165 [2] = SCA_MMAP_PROT, /* prot */ }, },
1166 { .name = "mq_unlink", .errmsg = true,
1167 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1168 { .name = "mremap", .hexret = true,
1169 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1170 [3] = SCA_MREMAP_FLAGS, /* flags */
1171 [4] = SCA_HEX, /* new_addr */ }, },
1172 { .name = "munlock", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1174 { .name = "munmap", .errmsg = true,
1175 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1176 { .name = "name_to_handle_at", .errmsg = true,
1177 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1178 { .name = "newfstatat", .errmsg = true,
1179 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1180 [1] = SCA_FILENAME, /* filename */ }, },
1181 { .name = "open", .errmsg = true,
1182 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1183 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1184 { .name = "open_by_handle_at", .errmsg = true,
1185 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1186 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187 { .name = "openat", .errmsg = true,
1188 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1189 [1] = SCA_FILENAME, /* filename */
1190 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1191 { .name = "perf_event_open", .errmsg = true,
1192 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1193 [2] = SCA_INT, /* cpu */
1194 [3] = SCA_FD, /* group_fd */
1195 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1196 { .name = "pipe2", .errmsg = true,
1197 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1198 { .name = "poll", .errmsg = true, .timeout = true, },
1199 { .name = "ppoll", .errmsg = true, .timeout = true, },
1200 { .name = "pread", .errmsg = true, .alias = "pread64",
1201 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1202 { .name = "preadv", .errmsg = true, .alias = "pread",
1203 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1205 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1206 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 { .name = "pwritev", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1209 { .name = "read", .errmsg = true,
1210 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1211 { .name = "readlink", .errmsg = true,
1212 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1213 { .name = "readlinkat", .errmsg = true,
1214 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1215 [1] = SCA_FILENAME, /* pathname */ }, },
1216 { .name = "readv", .errmsg = true,
1217 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1218 { .name = "recvfrom", .errmsg = true,
1219 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1220 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1221 { .name = "recvmmsg", .errmsg = true,
1222 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1223 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1224 { .name = "recvmsg", .errmsg = true,
1225 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1226 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1227 { .name = "removexattr", .errmsg = true,
1228 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229 { .name = "renameat", .errmsg = true,
1230 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1231 { .name = "rmdir", .errmsg = true,
1232 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1233 { .name = "rt_sigaction", .errmsg = true,
1234 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1235 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1236 { .name = "rt_sigqueueinfo", .errmsg = true,
1237 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1238 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1239 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1240 { .name = "select", .errmsg = true, .timeout = true, },
1241 { .name = "sendmmsg", .errmsg = true,
1242 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1243 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1244 { .name = "sendmsg", .errmsg = true,
1245 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1246 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1247 { .name = "sendto", .errmsg = true,
1248 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1249 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1250 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1251 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1252 { .name = "setxattr", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254 { .name = "shutdown", .errmsg = true,
1255 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1256 { .name = "socket", .errmsg = true,
1257 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258 [1] = SCA_SK_TYPE, /* type */ },
1259 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1260 { .name = "socketpair", .errmsg = true,
1261 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1262 [1] = SCA_SK_TYPE, /* type */ },
1263 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1264 { .name = "stat", .errmsg = true, .alias = "newstat",
1265 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1266 { .name = "statfs", .errmsg = true,
1267 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1268 { .name = "swapoff", .errmsg = true,
1269 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1270 { .name = "swapon", .errmsg = true,
1271 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1272 { .name = "symlinkat", .errmsg = true,
1273 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1274 { .name = "tgkill", .errmsg = true,
1275 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1276 { .name = "tkill", .errmsg = true,
1277 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1278 { .name = "truncate", .errmsg = true,
1279 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1280 { .name = "uname", .errmsg = true, .alias = "newuname", },
1281 { .name = "unlinkat", .errmsg = true,
1282 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1283 [1] = SCA_FILENAME, /* pathname */ }, },
1284 { .name = "utime", .errmsg = true,
1285 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1286 { .name = "utimensat", .errmsg = true,
1287 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1288 [1] = SCA_FILENAME, /* filename */ }, },
1289 { .name = "utimes", .errmsg = true,
1290 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1291 { .name = "vmsplice", .errmsg = true,
1292 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1293 { .name = "write", .errmsg = true,
1294 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1295 { .name = "writev", .errmsg = true,
1296 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1297 };
1298
syscall_fmt__cmp(const void * name,const void * fmtp)1299 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1300 {
1301 const struct syscall_fmt *fmt = fmtp;
1302 return strcmp(name, fmt->name);
1303 }
1304
syscall_fmt__find(const char * name)1305 static struct syscall_fmt *syscall_fmt__find(const char *name)
1306 {
1307 const int nmemb = ARRAY_SIZE(syscall_fmts);
1308 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1309 }
1310
1311 struct syscall {
1312 struct event_format *tp_format;
1313 int nr_args;
1314 struct format_field *args;
1315 const char *name;
1316 bool is_exit;
1317 struct syscall_fmt *fmt;
1318 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1319 void **arg_parm;
1320 };
1321
fprintf_duration(unsigned long t,FILE * fp)1322 static size_t fprintf_duration(unsigned long t, FILE *fp)
1323 {
1324 double duration = (double)t / NSEC_PER_MSEC;
1325 size_t printed = fprintf(fp, "(");
1326
1327 if (duration >= 1.0)
1328 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1329 else if (duration >= 0.01)
1330 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1331 else
1332 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1333 return printed + fprintf(fp, "): ");
1334 }
1335
1336 /**
1337 * filename.ptr: The filename char pointer that will be vfs_getname'd
1338 * filename.entry_str_pos: Where to insert the string translated from
1339 * filename.ptr by the vfs_getname tracepoint/kprobe.
1340 */
1341 struct thread_trace {
1342 u64 entry_time;
1343 u64 exit_time;
1344 bool entry_pending;
1345 unsigned long nr_events;
1346 unsigned long pfmaj, pfmin;
1347 char *entry_str;
1348 double runtime_ms;
1349 struct {
1350 unsigned long ptr;
1351 short int entry_str_pos;
1352 bool pending_open;
1353 unsigned int namelen;
1354 char *name;
1355 } filename;
1356 struct {
1357 int max;
1358 char **table;
1359 } paths;
1360
1361 struct intlist *syscall_stats;
1362 };
1363
thread_trace__new(void)1364 static struct thread_trace *thread_trace__new(void)
1365 {
1366 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1367
1368 if (ttrace)
1369 ttrace->paths.max = -1;
1370
1371 ttrace->syscall_stats = intlist__new(NULL);
1372
1373 return ttrace;
1374 }
1375
thread__trace(struct thread * thread,FILE * fp)1376 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1377 {
1378 struct thread_trace *ttrace;
1379
1380 if (thread == NULL)
1381 goto fail;
1382
1383 if (thread__priv(thread) == NULL)
1384 thread__set_priv(thread, thread_trace__new());
1385
1386 if (thread__priv(thread) == NULL)
1387 goto fail;
1388
1389 ttrace = thread__priv(thread);
1390 ++ttrace->nr_events;
1391
1392 return ttrace;
1393 fail:
1394 color_fprintf(fp, PERF_COLOR_RED,
1395 "WARNING: not enough memory, dropping samples!\n");
1396 return NULL;
1397 }
1398
1399 #define TRACE_PFMAJ (1 << 0)
1400 #define TRACE_PFMIN (1 << 1)
1401
1402 static const size_t trace__entry_str_size = 2048;
1403
1404 struct trace {
1405 struct perf_tool tool;
1406 struct {
1407 int machine;
1408 int open_id;
1409 } audit;
1410 struct {
1411 int max;
1412 struct syscall *table;
1413 struct {
1414 struct perf_evsel *sys_enter,
1415 *sys_exit;
1416 } events;
1417 } syscalls;
1418 struct record_opts opts;
1419 struct perf_evlist *evlist;
1420 struct machine *host;
1421 struct thread *current;
1422 u64 base_time;
1423 FILE *output;
1424 unsigned long nr_events;
1425 struct strlist *ev_qualifier;
1426 struct {
1427 size_t nr;
1428 int *entries;
1429 } ev_qualifier_ids;
1430 struct intlist *tid_list;
1431 struct intlist *pid_list;
1432 struct {
1433 size_t nr;
1434 pid_t *entries;
1435 } filter_pids;
1436 double duration_filter;
1437 double runtime_ms;
1438 struct {
1439 u64 vfs_getname,
1440 proc_getname;
1441 } stats;
1442 bool not_ev_qualifier;
1443 bool live;
1444 bool full_time;
1445 bool sched;
1446 bool multiple_threads;
1447 bool summary;
1448 bool summary_only;
1449 bool show_comm;
1450 bool show_tool_stats;
1451 bool trace_syscalls;
1452 bool force;
1453 bool vfs_getname;
1454 int trace_pgfaults;
1455 };
1456
trace__set_fd_pathname(struct thread * thread,int fd,const char * pathname)1457 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1458 {
1459 struct thread_trace *ttrace = thread__priv(thread);
1460
1461 if (fd > ttrace->paths.max) {
1462 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1463
1464 if (npath == NULL)
1465 return -1;
1466
1467 if (ttrace->paths.max != -1) {
1468 memset(npath + ttrace->paths.max + 1, 0,
1469 (fd - ttrace->paths.max) * sizeof(char *));
1470 } else {
1471 memset(npath, 0, (fd + 1) * sizeof(char *));
1472 }
1473
1474 ttrace->paths.table = npath;
1475 ttrace->paths.max = fd;
1476 }
1477
1478 ttrace->paths.table[fd] = strdup(pathname);
1479
1480 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1481 }
1482
thread__read_fd_path(struct thread * thread,int fd)1483 static int thread__read_fd_path(struct thread *thread, int fd)
1484 {
1485 char linkname[PATH_MAX], pathname[PATH_MAX];
1486 struct stat st;
1487 int ret;
1488
1489 if (thread->pid_ == thread->tid) {
1490 scnprintf(linkname, sizeof(linkname),
1491 "/proc/%d/fd/%d", thread->pid_, fd);
1492 } else {
1493 scnprintf(linkname, sizeof(linkname),
1494 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1495 }
1496
1497 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1498 return -1;
1499
1500 ret = readlink(linkname, pathname, sizeof(pathname));
1501
1502 if (ret < 0 || ret > st.st_size)
1503 return -1;
1504
1505 pathname[ret] = '\0';
1506 return trace__set_fd_pathname(thread, fd, pathname);
1507 }
1508
thread__fd_path(struct thread * thread,int fd,struct trace * trace)1509 static const char *thread__fd_path(struct thread *thread, int fd,
1510 struct trace *trace)
1511 {
1512 struct thread_trace *ttrace = thread__priv(thread);
1513
1514 if (ttrace == NULL)
1515 return NULL;
1516
1517 if (fd < 0)
1518 return NULL;
1519
1520 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1521 if (!trace->live)
1522 return NULL;
1523 ++trace->stats.proc_getname;
1524 if (thread__read_fd_path(thread, fd))
1525 return NULL;
1526 }
1527
1528 return ttrace->paths.table[fd];
1529 }
1530
syscall_arg__scnprintf_fd(char * bf,size_t size,struct syscall_arg * arg)1531 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1532 struct syscall_arg *arg)
1533 {
1534 int fd = arg->val;
1535 size_t printed = scnprintf(bf, size, "%d", fd);
1536 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1537
1538 if (path)
1539 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1540
1541 return printed;
1542 }
1543
syscall_arg__scnprintf_close_fd(char * bf,size_t size,struct syscall_arg * arg)1544 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1545 struct syscall_arg *arg)
1546 {
1547 int fd = arg->val;
1548 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1549 struct thread_trace *ttrace = thread__priv(arg->thread);
1550
1551 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1552 zfree(&ttrace->paths.table[fd]);
1553
1554 return printed;
1555 }
1556
thread__set_filename_pos(struct thread * thread,const char * bf,unsigned long ptr)1557 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1558 unsigned long ptr)
1559 {
1560 struct thread_trace *ttrace = thread__priv(thread);
1561
1562 ttrace->filename.ptr = ptr;
1563 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1564 }
1565
syscall_arg__scnprintf_filename(char * bf,size_t size,struct syscall_arg * arg)1566 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1567 struct syscall_arg *arg)
1568 {
1569 unsigned long ptr = arg->val;
1570
1571 if (!arg->trace->vfs_getname)
1572 return scnprintf(bf, size, "%#x", ptr);
1573
1574 thread__set_filename_pos(arg->thread, bf, ptr);
1575 return 0;
1576 }
1577
trace__filter_duration(struct trace * trace,double t)1578 static bool trace__filter_duration(struct trace *trace, double t)
1579 {
1580 return t < (trace->duration_filter * NSEC_PER_MSEC);
1581 }
1582
trace__fprintf_tstamp(struct trace * trace,u64 tstamp,FILE * fp)1583 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1584 {
1585 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1586
1587 return fprintf(fp, "%10.3f ", ts);
1588 }
1589
1590 static bool done = false;
1591 static bool interrupted = false;
1592
sig_handler(int sig)1593 static void sig_handler(int sig)
1594 {
1595 done = true;
1596 interrupted = sig == SIGINT;
1597 }
1598
trace__fprintf_entry_head(struct trace * trace,struct thread * thread,u64 duration,u64 tstamp,FILE * fp)1599 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1600 u64 duration, u64 tstamp, FILE *fp)
1601 {
1602 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1603 printed += fprintf_duration(duration, fp);
1604
1605 if (trace->multiple_threads) {
1606 if (trace->show_comm)
1607 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1608 printed += fprintf(fp, "%d ", thread->tid);
1609 }
1610
1611 return printed;
1612 }
1613
trace__process_event(struct trace * trace,struct machine * machine,union perf_event * event,struct perf_sample * sample)1614 static int trace__process_event(struct trace *trace, struct machine *machine,
1615 union perf_event *event, struct perf_sample *sample)
1616 {
1617 int ret = 0;
1618
1619 switch (event->header.type) {
1620 case PERF_RECORD_LOST:
1621 color_fprintf(trace->output, PERF_COLOR_RED,
1622 "LOST %" PRIu64 " events!\n", event->lost.lost);
1623 ret = machine__process_lost_event(machine, event, sample);
1624 break;
1625 default:
1626 ret = machine__process_event(machine, event, sample);
1627 break;
1628 }
1629
1630 return ret;
1631 }
1632
trace__tool_process(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1633 static int trace__tool_process(struct perf_tool *tool,
1634 union perf_event *event,
1635 struct perf_sample *sample,
1636 struct machine *machine)
1637 {
1638 struct trace *trace = container_of(tool, struct trace, tool);
1639 return trace__process_event(trace, machine, event, sample);
1640 }
1641
trace__symbols_init(struct trace * trace,struct perf_evlist * evlist)1642 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1643 {
1644 int err = symbol__init(NULL);
1645
1646 if (err)
1647 return err;
1648
1649 trace->host = machine__new_host();
1650 if (trace->host == NULL)
1651 return -ENOMEM;
1652
1653 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1654 return -errno;
1655
1656 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1657 evlist->threads, trace__tool_process, false,
1658 trace->opts.proc_map_timeout);
1659 if (err)
1660 symbol__exit();
1661
1662 return err;
1663 }
1664
syscall__set_arg_fmts(struct syscall * sc)1665 static int syscall__set_arg_fmts(struct syscall *sc)
1666 {
1667 struct format_field *field;
1668 int idx = 0;
1669
1670 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1671 if (sc->arg_scnprintf == NULL)
1672 return -1;
1673
1674 if (sc->fmt)
1675 sc->arg_parm = sc->fmt->arg_parm;
1676
1677 for (field = sc->args; field; field = field->next) {
1678 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1679 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1680 else if (field->flags & FIELD_IS_POINTER)
1681 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1682 ++idx;
1683 }
1684
1685 return 0;
1686 }
1687
trace__read_syscall_info(struct trace * trace,int id)1688 static int trace__read_syscall_info(struct trace *trace, int id)
1689 {
1690 char tp_name[128];
1691 struct syscall *sc;
1692 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1693
1694 if (name == NULL)
1695 return -1;
1696
1697 if (id > trace->syscalls.max) {
1698 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1699
1700 if (nsyscalls == NULL)
1701 return -1;
1702
1703 if (trace->syscalls.max != -1) {
1704 memset(nsyscalls + trace->syscalls.max + 1, 0,
1705 (id - trace->syscalls.max) * sizeof(*sc));
1706 } else {
1707 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1708 }
1709
1710 trace->syscalls.table = nsyscalls;
1711 trace->syscalls.max = id;
1712 }
1713
1714 sc = trace->syscalls.table + id;
1715 sc->name = name;
1716
1717 sc->fmt = syscall_fmt__find(sc->name);
1718
1719 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1720 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1721
1722 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1723 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1724 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1725 }
1726
1727 if (IS_ERR(sc->tp_format))
1728 return -1;
1729
1730 sc->args = sc->tp_format->format.fields;
1731 sc->nr_args = sc->tp_format->format.nr_fields;
1732 /* drop nr field - not relevant here; does not exist on older kernels */
1733 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1734 sc->args = sc->args->next;
1735 --sc->nr_args;
1736 }
1737
1738 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1739
1740 return syscall__set_arg_fmts(sc);
1741 }
1742
trace__validate_ev_qualifier(struct trace * trace)1743 static int trace__validate_ev_qualifier(struct trace *trace)
1744 {
1745 int err = 0, i;
1746 struct str_node *pos;
1747
1748 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1749 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1750 sizeof(trace->ev_qualifier_ids.entries[0]));
1751
1752 if (trace->ev_qualifier_ids.entries == NULL) {
1753 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1754 trace->output);
1755 err = -EINVAL;
1756 goto out;
1757 }
1758
1759 i = 0;
1760
1761 strlist__for_each(pos, trace->ev_qualifier) {
1762 const char *sc = pos->s;
1763 int id = audit_name_to_syscall(sc, trace->audit.machine);
1764
1765 if (id < 0) {
1766 if (err == 0) {
1767 fputs("Error:\tInvalid syscall ", trace->output);
1768 err = -EINVAL;
1769 } else {
1770 fputs(", ", trace->output);
1771 }
1772
1773 fputs(sc, trace->output);
1774 }
1775
1776 trace->ev_qualifier_ids.entries[i++] = id;
1777 }
1778
1779 if (err < 0) {
1780 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1781 "\nHint:\tand: 'man syscalls'\n", trace->output);
1782 zfree(&trace->ev_qualifier_ids.entries);
1783 trace->ev_qualifier_ids.nr = 0;
1784 }
1785 out:
1786 return err;
1787 }
1788
1789 /*
1790 * args is to be interpreted as a series of longs but we need to handle
1791 * 8-byte unaligned accesses. args points to raw_data within the event
1792 * and raw_data is guaranteed to be 8-byte unaligned because it is
1793 * preceded by raw_size which is a u32. So we need to copy args to a temp
1794 * variable to read it. Most notably this avoids extended load instructions
1795 * on unaligned addresses
1796 */
1797
syscall__scnprintf_args(struct syscall * sc,char * bf,size_t size,unsigned char * args,struct trace * trace,struct thread * thread)1798 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1799 unsigned char *args, struct trace *trace,
1800 struct thread *thread)
1801 {
1802 size_t printed = 0;
1803 unsigned char *p;
1804 unsigned long val;
1805
1806 if (sc->args != NULL) {
1807 struct format_field *field;
1808 u8 bit = 1;
1809 struct syscall_arg arg = {
1810 .idx = 0,
1811 .mask = 0,
1812 .trace = trace,
1813 .thread = thread,
1814 };
1815
1816 for (field = sc->args; field;
1817 field = field->next, ++arg.idx, bit <<= 1) {
1818 if (arg.mask & bit)
1819 continue;
1820
1821 /* special care for unaligned accesses */
1822 p = args + sizeof(unsigned long) * arg.idx;
1823 memcpy(&val, p, sizeof(val));
1824
1825 /*
1826 * Suppress this argument if its value is zero and
1827 * and we don't have a string associated in an
1828 * strarray for it.
1829 */
1830 if (val == 0 &&
1831 !(sc->arg_scnprintf &&
1832 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1833 sc->arg_parm[arg.idx]))
1834 continue;
1835
1836 printed += scnprintf(bf + printed, size - printed,
1837 "%s%s: ", printed ? ", " : "", field->name);
1838 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1839 arg.val = val;
1840 if (sc->arg_parm)
1841 arg.parm = sc->arg_parm[arg.idx];
1842 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1843 size - printed, &arg);
1844 } else {
1845 printed += scnprintf(bf + printed, size - printed,
1846 "%ld", val);
1847 }
1848 }
1849 } else {
1850 int i = 0;
1851
1852 while (i < 6) {
1853 /* special care for unaligned accesses */
1854 p = args + sizeof(unsigned long) * i;
1855 memcpy(&val, p, sizeof(val));
1856 printed += scnprintf(bf + printed, size - printed,
1857 "%sarg%d: %ld",
1858 printed ? ", " : "", i, val);
1859 ++i;
1860 }
1861 }
1862
1863 return printed;
1864 }
1865
1866 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1867 union perf_event *event,
1868 struct perf_sample *sample);
1869
trace__syscall_info(struct trace * trace,struct perf_evsel * evsel,int id)1870 static struct syscall *trace__syscall_info(struct trace *trace,
1871 struct perf_evsel *evsel, int id)
1872 {
1873
1874 if (id < 0) {
1875
1876 /*
1877 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1878 * before that, leaving at a higher verbosity level till that is
1879 * explained. Reproduced with plain ftrace with:
1880 *
1881 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1882 * grep "NR -1 " /t/trace_pipe
1883 *
1884 * After generating some load on the machine.
1885 */
1886 if (verbose > 1) {
1887 static u64 n;
1888 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1889 id, perf_evsel__name(evsel), ++n);
1890 }
1891 return NULL;
1892 }
1893
1894 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1895 trace__read_syscall_info(trace, id))
1896 goto out_cant_read;
1897
1898 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1899 goto out_cant_read;
1900
1901 return &trace->syscalls.table[id];
1902
1903 out_cant_read:
1904 if (verbose) {
1905 fprintf(trace->output, "Problems reading syscall %d", id);
1906 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1907 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1908 fputs(" information\n", trace->output);
1909 }
1910 return NULL;
1911 }
1912
thread__update_stats(struct thread_trace * ttrace,int id,struct perf_sample * sample)1913 static void thread__update_stats(struct thread_trace *ttrace,
1914 int id, struct perf_sample *sample)
1915 {
1916 struct int_node *inode;
1917 struct stats *stats;
1918 u64 duration = 0;
1919
1920 inode = intlist__findnew(ttrace->syscall_stats, id);
1921 if (inode == NULL)
1922 return;
1923
1924 stats = inode->priv;
1925 if (stats == NULL) {
1926 stats = malloc(sizeof(struct stats));
1927 if (stats == NULL)
1928 return;
1929 init_stats(stats);
1930 inode->priv = stats;
1931 }
1932
1933 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1934 duration = sample->time - ttrace->entry_time;
1935
1936 update_stats(stats, duration);
1937 }
1938
trace__printf_interrupted_entry(struct trace * trace,struct perf_sample * sample)1939 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1940 {
1941 struct thread_trace *ttrace;
1942 u64 duration;
1943 size_t printed;
1944
1945 if (trace->current == NULL)
1946 return 0;
1947
1948 ttrace = thread__priv(trace->current);
1949
1950 if (!ttrace->entry_pending)
1951 return 0;
1952
1953 duration = sample->time - ttrace->entry_time;
1954
1955 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1956 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1957 ttrace->entry_pending = false;
1958
1959 return printed;
1960 }
1961
trace__sys_enter(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1962 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1963 union perf_event *event __maybe_unused,
1964 struct perf_sample *sample)
1965 {
1966 char *msg;
1967 void *args;
1968 size_t printed = 0;
1969 struct thread *thread;
1970 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1971 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1972 struct thread_trace *ttrace;
1973
1974 if (sc == NULL)
1975 return -1;
1976
1977 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978 ttrace = thread__trace(thread, trace->output);
1979 if (ttrace == NULL)
1980 goto out_put;
1981
1982 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1983
1984 if (ttrace->entry_str == NULL) {
1985 ttrace->entry_str = malloc(trace__entry_str_size);
1986 if (!ttrace->entry_str)
1987 goto out_put;
1988 }
1989
1990 if (!trace->summary_only)
1991 trace__printf_interrupted_entry(trace, sample);
1992
1993 ttrace->entry_time = sample->time;
1994 msg = ttrace->entry_str;
1995 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1996
1997 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1998 args, trace, thread);
1999
2000 if (sc->is_exit) {
2001 if (!trace->duration_filter && !trace->summary_only) {
2002 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2003 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2004 }
2005 } else {
2006 ttrace->entry_pending = true;
2007 /* See trace__vfs_getname & trace__sys_exit */
2008 ttrace->filename.pending_open = false;
2009 }
2010
2011 if (trace->current != thread) {
2012 thread__put(trace->current);
2013 trace->current = thread__get(thread);
2014 }
2015 err = 0;
2016 out_put:
2017 thread__put(thread);
2018 return err;
2019 }
2020
trace__sys_exit(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2021 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2022 union perf_event *event __maybe_unused,
2023 struct perf_sample *sample)
2024 {
2025 long ret;
2026 u64 duration = 0;
2027 struct thread *thread;
2028 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2029 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030 struct thread_trace *ttrace;
2031
2032 if (sc == NULL)
2033 return -1;
2034
2035 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2036 ttrace = thread__trace(thread, trace->output);
2037 if (ttrace == NULL)
2038 goto out_put;
2039
2040 if (trace->summary)
2041 thread__update_stats(ttrace, id, sample);
2042
2043 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2044
2045 if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2046 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2047 ttrace->filename.pending_open = false;
2048 ++trace->stats.vfs_getname;
2049 }
2050
2051 ttrace->exit_time = sample->time;
2052
2053 if (ttrace->entry_time) {
2054 duration = sample->time - ttrace->entry_time;
2055 if (trace__filter_duration(trace, duration))
2056 goto out;
2057 } else if (trace->duration_filter)
2058 goto out;
2059
2060 if (trace->summary_only)
2061 goto out;
2062
2063 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2064
2065 if (ttrace->entry_pending) {
2066 fprintf(trace->output, "%-70s", ttrace->entry_str);
2067 } else {
2068 fprintf(trace->output, " ... [");
2069 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2070 fprintf(trace->output, "]: %s()", sc->name);
2071 }
2072
2073 if (sc->fmt == NULL) {
2074 signed_print:
2075 fprintf(trace->output, ") = %ld", ret);
2076 } else if (ret < 0 && sc->fmt->errmsg) {
2077 char bf[STRERR_BUFSIZE];
2078 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2079 *e = audit_errno_to_name(-ret);
2080
2081 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2082 } else if (ret == 0 && sc->fmt->timeout)
2083 fprintf(trace->output, ") = 0 Timeout");
2084 else if (sc->fmt->hexret)
2085 fprintf(trace->output, ") = %#lx", ret);
2086 else
2087 goto signed_print;
2088
2089 fputc('\n', trace->output);
2090 out:
2091 ttrace->entry_pending = false;
2092 err = 0;
2093 out_put:
2094 thread__put(thread);
2095 return err;
2096 }
2097
trace__vfs_getname(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2098 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2099 union perf_event *event __maybe_unused,
2100 struct perf_sample *sample)
2101 {
2102 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2103 struct thread_trace *ttrace;
2104 size_t filename_len, entry_str_len, to_move;
2105 ssize_t remaining_space;
2106 char *pos;
2107 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2108
2109 if (!thread)
2110 goto out;
2111
2112 ttrace = thread__priv(thread);
2113 if (!ttrace)
2114 goto out;
2115
2116 filename_len = strlen(filename);
2117
2118 if (ttrace->filename.namelen < filename_len) {
2119 char *f = realloc(ttrace->filename.name, filename_len + 1);
2120
2121 if (f == NULL)
2122 goto out;
2123
2124 ttrace->filename.namelen = filename_len;
2125 ttrace->filename.name = f;
2126 }
2127
2128 strcpy(ttrace->filename.name, filename);
2129 ttrace->filename.pending_open = true;
2130
2131 if (!ttrace->filename.ptr)
2132 goto out;
2133
2134 entry_str_len = strlen(ttrace->entry_str);
2135 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2136 if (remaining_space <= 0)
2137 goto out;
2138
2139 if (filename_len > (size_t)remaining_space) {
2140 filename += filename_len - remaining_space;
2141 filename_len = remaining_space;
2142 }
2143
2144 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2145 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2146 memmove(pos + filename_len, pos, to_move);
2147 memcpy(pos, filename, filename_len);
2148
2149 ttrace->filename.ptr = 0;
2150 ttrace->filename.entry_str_pos = 0;
2151 out:
2152 return 0;
2153 }
2154
trace__sched_stat_runtime(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2155 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2156 union perf_event *event __maybe_unused,
2157 struct perf_sample *sample)
2158 {
2159 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2160 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2161 struct thread *thread = machine__findnew_thread(trace->host,
2162 sample->pid,
2163 sample->tid);
2164 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2165
2166 if (ttrace == NULL)
2167 goto out_dump;
2168
2169 ttrace->runtime_ms += runtime_ms;
2170 trace->runtime_ms += runtime_ms;
2171 thread__put(thread);
2172 return 0;
2173
2174 out_dump:
2175 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2176 evsel->name,
2177 perf_evsel__strval(evsel, sample, "comm"),
2178 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2179 runtime,
2180 perf_evsel__intval(evsel, sample, "vruntime"));
2181 thread__put(thread);
2182 return 0;
2183 }
2184
trace__event_handler(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2185 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2186 union perf_event *event __maybe_unused,
2187 struct perf_sample *sample)
2188 {
2189 trace__printf_interrupted_entry(trace, sample);
2190 trace__fprintf_tstamp(trace, sample->time, trace->output);
2191
2192 if (trace->trace_syscalls)
2193 fprintf(trace->output, "( ): ");
2194
2195 fprintf(trace->output, "%s:", evsel->name);
2196
2197 if (evsel->tp_format) {
2198 event_format__fprintf(evsel->tp_format, sample->cpu,
2199 sample->raw_data, sample->raw_size,
2200 trace->output);
2201 }
2202
2203 fprintf(trace->output, ")\n");
2204 return 0;
2205 }
2206
print_location(FILE * f,struct perf_sample * sample,struct addr_location * al,bool print_dso,bool print_sym)2207 static void print_location(FILE *f, struct perf_sample *sample,
2208 struct addr_location *al,
2209 bool print_dso, bool print_sym)
2210 {
2211
2212 if ((verbose || print_dso) && al->map)
2213 fprintf(f, "%s@", al->map->dso->long_name);
2214
2215 if ((verbose || print_sym) && al->sym)
2216 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2217 al->addr - al->sym->start);
2218 else if (al->map)
2219 fprintf(f, "0x%" PRIx64, al->addr);
2220 else
2221 fprintf(f, "0x%" PRIx64, sample->addr);
2222 }
2223
trace__pgfault(struct trace * trace,struct perf_evsel * evsel,union perf_event * event,struct perf_sample * sample)2224 static int trace__pgfault(struct trace *trace,
2225 struct perf_evsel *evsel,
2226 union perf_event *event,
2227 struct perf_sample *sample)
2228 {
2229 struct thread *thread;
2230 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2231 struct addr_location al;
2232 char map_type = 'd';
2233 struct thread_trace *ttrace;
2234 int err = -1;
2235
2236 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2237 ttrace = thread__trace(thread, trace->output);
2238 if (ttrace == NULL)
2239 goto out_put;
2240
2241 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2242 ttrace->pfmaj++;
2243 else
2244 ttrace->pfmin++;
2245
2246 if (trace->summary_only)
2247 goto out;
2248
2249 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2250 sample->ip, &al);
2251
2252 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2253
2254 fprintf(trace->output, "%sfault [",
2255 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2256 "maj" : "min");
2257
2258 print_location(trace->output, sample, &al, false, true);
2259
2260 fprintf(trace->output, "] => ");
2261
2262 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2263 sample->addr, &al);
2264
2265 if (!al.map) {
2266 thread__find_addr_location(thread, cpumode,
2267 MAP__FUNCTION, sample->addr, &al);
2268
2269 if (al.map)
2270 map_type = 'x';
2271 else
2272 map_type = '?';
2273 }
2274
2275 print_location(trace->output, sample, &al, true, false);
2276
2277 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2278 out:
2279 err = 0;
2280 out_put:
2281 thread__put(thread);
2282 return err;
2283 }
2284
skip_sample(struct trace * trace,struct perf_sample * sample)2285 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2286 {
2287 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2288 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2289 return false;
2290
2291 if (trace->pid_list || trace->tid_list)
2292 return true;
2293
2294 return false;
2295 }
2296
trace__process_sample(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct perf_evsel * evsel,struct machine * machine __maybe_unused)2297 static int trace__process_sample(struct perf_tool *tool,
2298 union perf_event *event,
2299 struct perf_sample *sample,
2300 struct perf_evsel *evsel,
2301 struct machine *machine __maybe_unused)
2302 {
2303 struct trace *trace = container_of(tool, struct trace, tool);
2304 int err = 0;
2305
2306 tracepoint_handler handler = evsel->handler;
2307
2308 if (skip_sample(trace, sample))
2309 return 0;
2310
2311 if (!trace->full_time && trace->base_time == 0)
2312 trace->base_time = sample->time;
2313
2314 if (handler) {
2315 ++trace->nr_events;
2316 handler(trace, evsel, event, sample);
2317 }
2318
2319 return err;
2320 }
2321
parse_target_str(struct trace * trace)2322 static int parse_target_str(struct trace *trace)
2323 {
2324 if (trace->opts.target.pid) {
2325 trace->pid_list = intlist__new(trace->opts.target.pid);
2326 if (trace->pid_list == NULL) {
2327 pr_err("Error parsing process id string\n");
2328 return -EINVAL;
2329 }
2330 }
2331
2332 if (trace->opts.target.tid) {
2333 trace->tid_list = intlist__new(trace->opts.target.tid);
2334 if (trace->tid_list == NULL) {
2335 pr_err("Error parsing thread id string\n");
2336 return -EINVAL;
2337 }
2338 }
2339
2340 return 0;
2341 }
2342
trace__record(struct trace * trace,int argc,const char ** argv)2343 static int trace__record(struct trace *trace, int argc, const char **argv)
2344 {
2345 unsigned int rec_argc, i, j;
2346 const char **rec_argv;
2347 const char * const record_args[] = {
2348 "record",
2349 "-R",
2350 "-m", "1024",
2351 "-c", "1",
2352 };
2353
2354 const char * const sc_args[] = { "-e", };
2355 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2356 const char * const majpf_args[] = { "-e", "major-faults" };
2357 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2358 const char * const minpf_args[] = { "-e", "minor-faults" };
2359 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2360
2361 /* +1 is for the event string below */
2362 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2363 majpf_args_nr + minpf_args_nr + argc;
2364 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2365
2366 if (rec_argv == NULL)
2367 return -ENOMEM;
2368
2369 j = 0;
2370 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2371 rec_argv[j++] = record_args[i];
2372
2373 if (trace->trace_syscalls) {
2374 for (i = 0; i < sc_args_nr; i++)
2375 rec_argv[j++] = sc_args[i];
2376
2377 /* event string may be different for older kernels - e.g., RHEL6 */
2378 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2379 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2380 else if (is_valid_tracepoint("syscalls:sys_enter"))
2381 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2382 else {
2383 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2384 return -1;
2385 }
2386 }
2387
2388 if (trace->trace_pgfaults & TRACE_PFMAJ)
2389 for (i = 0; i < majpf_args_nr; i++)
2390 rec_argv[j++] = majpf_args[i];
2391
2392 if (trace->trace_pgfaults & TRACE_PFMIN)
2393 for (i = 0; i < minpf_args_nr; i++)
2394 rec_argv[j++] = minpf_args[i];
2395
2396 for (i = 0; i < (unsigned int)argc; i++)
2397 rec_argv[j++] = argv[i];
2398
2399 return cmd_record(j, rec_argv, NULL);
2400 }
2401
2402 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2403
perf_evlist__add_vfs_getname(struct perf_evlist * evlist)2404 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2405 {
2406 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2407
2408 if (IS_ERR(evsel))
2409 return false;
2410
2411 if (perf_evsel__field(evsel, "pathname") == NULL) {
2412 perf_evsel__delete(evsel);
2413 return false;
2414 }
2415
2416 evsel->handler = trace__vfs_getname;
2417 perf_evlist__add(evlist, evsel);
2418 return true;
2419 }
2420
perf_evlist__add_pgfault(struct perf_evlist * evlist,u64 config)2421 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2422 u64 config)
2423 {
2424 struct perf_evsel *evsel;
2425 struct perf_event_attr attr = {
2426 .type = PERF_TYPE_SOFTWARE,
2427 .mmap_data = 1,
2428 };
2429
2430 attr.config = config;
2431 attr.sample_period = 1;
2432
2433 event_attr_init(&attr);
2434
2435 evsel = perf_evsel__new(&attr);
2436 if (!evsel)
2437 return -ENOMEM;
2438
2439 evsel->handler = trace__pgfault;
2440 perf_evlist__add(evlist, evsel);
2441
2442 return 0;
2443 }
2444
trace__handle_event(struct trace * trace,union perf_event * event,struct perf_sample * sample)2445 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2446 {
2447 const u32 type = event->header.type;
2448 struct perf_evsel *evsel;
2449
2450 if (!trace->full_time && trace->base_time == 0)
2451 trace->base_time = sample->time;
2452
2453 if (type != PERF_RECORD_SAMPLE) {
2454 trace__process_event(trace, trace->host, event, sample);
2455 return;
2456 }
2457
2458 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2459 if (evsel == NULL) {
2460 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2461 return;
2462 }
2463
2464 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2465 sample->raw_data == NULL) {
2466 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2467 perf_evsel__name(evsel), sample->tid,
2468 sample->cpu, sample->raw_size);
2469 } else {
2470 tracepoint_handler handler = evsel->handler;
2471 handler(trace, evsel, event, sample);
2472 }
2473 }
2474
trace__add_syscall_newtp(struct trace * trace)2475 static int trace__add_syscall_newtp(struct trace *trace)
2476 {
2477 int ret = -1;
2478 struct perf_evlist *evlist = trace->evlist;
2479 struct perf_evsel *sys_enter, *sys_exit;
2480
2481 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2482 if (sys_enter == NULL)
2483 goto out;
2484
2485 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2486 goto out_delete_sys_enter;
2487
2488 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2489 if (sys_exit == NULL)
2490 goto out_delete_sys_enter;
2491
2492 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2493 goto out_delete_sys_exit;
2494
2495 perf_evlist__add(evlist, sys_enter);
2496 perf_evlist__add(evlist, sys_exit);
2497
2498 trace->syscalls.events.sys_enter = sys_enter;
2499 trace->syscalls.events.sys_exit = sys_exit;
2500
2501 ret = 0;
2502 out:
2503 return ret;
2504
2505 out_delete_sys_exit:
2506 perf_evsel__delete_priv(sys_exit);
2507 out_delete_sys_enter:
2508 perf_evsel__delete_priv(sys_enter);
2509 goto out;
2510 }
2511
trace__set_ev_qualifier_filter(struct trace * trace)2512 static int trace__set_ev_qualifier_filter(struct trace *trace)
2513 {
2514 int err = -1;
2515 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2516 trace->ev_qualifier_ids.nr,
2517 trace->ev_qualifier_ids.entries);
2518
2519 if (filter == NULL)
2520 goto out_enomem;
2521
2522 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2523 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2524
2525 free(filter);
2526 out:
2527 return err;
2528 out_enomem:
2529 errno = ENOMEM;
2530 goto out;
2531 }
2532
trace__run(struct trace * trace,int argc,const char ** argv)2533 static int trace__run(struct trace *trace, int argc, const char **argv)
2534 {
2535 struct perf_evlist *evlist = trace->evlist;
2536 struct perf_evsel *evsel;
2537 int err = -1, i;
2538 unsigned long before;
2539 const bool forks = argc > 0;
2540 bool draining = false;
2541
2542 trace->live = true;
2543
2544 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2545 goto out_error_raw_syscalls;
2546
2547 if (trace->trace_syscalls)
2548 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2549
2550 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2551 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2552 goto out_error_mem;
2553 }
2554
2555 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2556 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2557 goto out_error_mem;
2558
2559 if (trace->sched &&
2560 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2561 trace__sched_stat_runtime))
2562 goto out_error_sched_stat_runtime;
2563
2564 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2565 if (err < 0) {
2566 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2567 goto out_delete_evlist;
2568 }
2569
2570 err = trace__symbols_init(trace, evlist);
2571 if (err < 0) {
2572 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2573 goto out_delete_evlist;
2574 }
2575
2576 perf_evlist__config(evlist, &trace->opts);
2577
2578 signal(SIGCHLD, sig_handler);
2579 signal(SIGINT, sig_handler);
2580
2581 if (forks) {
2582 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2583 argv, false, NULL);
2584 if (err < 0) {
2585 fprintf(trace->output, "Couldn't run the workload!\n");
2586 goto out_delete_evlist;
2587 }
2588 }
2589
2590 err = perf_evlist__open(evlist);
2591 if (err < 0)
2592 goto out_error_open;
2593
2594 /*
2595 * Better not use !target__has_task() here because we need to cover the
2596 * case where no threads were specified in the command line, but a
2597 * workload was, and in that case we will fill in the thread_map when
2598 * we fork the workload in perf_evlist__prepare_workload.
2599 */
2600 if (trace->filter_pids.nr > 0)
2601 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2602 else if (thread_map__pid(evlist->threads, 0) == -1)
2603 err = perf_evlist__set_filter_pid(evlist, getpid());
2604
2605 if (err < 0)
2606 goto out_error_mem;
2607
2608 if (trace->ev_qualifier_ids.nr > 0) {
2609 err = trace__set_ev_qualifier_filter(trace);
2610 if (err < 0)
2611 goto out_errno;
2612
2613 pr_debug("event qualifier tracepoint filter: %s\n",
2614 trace->syscalls.events.sys_exit->filter);
2615 }
2616
2617 err = perf_evlist__apply_filters(evlist, &evsel);
2618 if (err < 0)
2619 goto out_error_apply_filters;
2620
2621 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2622 if (err < 0)
2623 goto out_error_mmap;
2624
2625 if (!target__none(&trace->opts.target))
2626 perf_evlist__enable(evlist);
2627
2628 if (forks)
2629 perf_evlist__start_workload(evlist);
2630
2631 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2632 evlist->threads->nr > 1 ||
2633 perf_evlist__first(evlist)->attr.inherit;
2634 again:
2635 before = trace->nr_events;
2636
2637 for (i = 0; i < evlist->nr_mmaps; i++) {
2638 union perf_event *event;
2639
2640 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2641 struct perf_sample sample;
2642
2643 ++trace->nr_events;
2644
2645 err = perf_evlist__parse_sample(evlist, event, &sample);
2646 if (err) {
2647 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2648 goto next_event;
2649 }
2650
2651 trace__handle_event(trace, event, &sample);
2652 next_event:
2653 perf_evlist__mmap_consume(evlist, i);
2654
2655 if (interrupted)
2656 goto out_disable;
2657
2658 if (done && !draining) {
2659 perf_evlist__disable(evlist);
2660 draining = true;
2661 }
2662 }
2663 }
2664
2665 if (trace->nr_events == before) {
2666 int timeout = done ? 100 : -1;
2667
2668 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2669 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2670 draining = true;
2671
2672 goto again;
2673 }
2674 } else {
2675 goto again;
2676 }
2677
2678 out_disable:
2679 thread__zput(trace->current);
2680
2681 perf_evlist__disable(evlist);
2682
2683 if (!err) {
2684 if (trace->summary)
2685 trace__fprintf_thread_summary(trace, trace->output);
2686
2687 if (trace->show_tool_stats) {
2688 fprintf(trace->output, "Stats:\n "
2689 " vfs_getname : %" PRIu64 "\n"
2690 " proc_getname: %" PRIu64 "\n",
2691 trace->stats.vfs_getname,
2692 trace->stats.proc_getname);
2693 }
2694 }
2695
2696 out_delete_evlist:
2697 perf_evlist__delete(evlist);
2698 trace->evlist = NULL;
2699 trace->live = false;
2700 return err;
2701 {
2702 char errbuf[BUFSIZ];
2703
2704 out_error_sched_stat_runtime:
2705 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2706 goto out_error;
2707
2708 out_error_raw_syscalls:
2709 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2710 goto out_error;
2711
2712 out_error_mmap:
2713 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2714 goto out_error;
2715
2716 out_error_open:
2717 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2718
2719 out_error:
2720 fprintf(trace->output, "%s\n", errbuf);
2721 goto out_delete_evlist;
2722
2723 out_error_apply_filters:
2724 fprintf(trace->output,
2725 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2726 evsel->filter, perf_evsel__name(evsel), errno,
2727 strerror_r(errno, errbuf, sizeof(errbuf)));
2728 goto out_delete_evlist;
2729 }
2730 out_error_mem:
2731 fprintf(trace->output, "Not enough memory to run!\n");
2732 goto out_delete_evlist;
2733
2734 out_errno:
2735 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2736 goto out_delete_evlist;
2737 }
2738
trace__replay(struct trace * trace)2739 static int trace__replay(struct trace *trace)
2740 {
2741 const struct perf_evsel_str_handler handlers[] = {
2742 { "probe:vfs_getname", trace__vfs_getname, },
2743 };
2744 struct perf_data_file file = {
2745 .path = input_name,
2746 .mode = PERF_DATA_MODE_READ,
2747 .force = trace->force,
2748 };
2749 struct perf_session *session;
2750 struct perf_evsel *evsel;
2751 int err = -1;
2752
2753 trace->tool.sample = trace__process_sample;
2754 trace->tool.mmap = perf_event__process_mmap;
2755 trace->tool.mmap2 = perf_event__process_mmap2;
2756 trace->tool.comm = perf_event__process_comm;
2757 trace->tool.exit = perf_event__process_exit;
2758 trace->tool.fork = perf_event__process_fork;
2759 trace->tool.attr = perf_event__process_attr;
2760 trace->tool.tracing_data = perf_event__process_tracing_data;
2761 trace->tool.build_id = perf_event__process_build_id;
2762
2763 trace->tool.ordered_events = true;
2764 trace->tool.ordering_requires_timestamps = true;
2765
2766 /* add tid to output */
2767 trace->multiple_threads = true;
2768
2769 session = perf_session__new(&file, false, &trace->tool);
2770 if (session == NULL)
2771 return -1;
2772
2773 if (symbol__init(&session->header.env) < 0)
2774 goto out;
2775
2776 trace->host = &session->machines.host;
2777
2778 err = perf_session__set_tracepoints_handlers(session, handlers);
2779 if (err)
2780 goto out;
2781
2782 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2783 "raw_syscalls:sys_enter");
2784 /* older kernels have syscalls tp versus raw_syscalls */
2785 if (evsel == NULL)
2786 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2787 "syscalls:sys_enter");
2788
2789 if (evsel &&
2790 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2791 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2792 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2793 goto out;
2794 }
2795
2796 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2797 "raw_syscalls:sys_exit");
2798 if (evsel == NULL)
2799 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2800 "syscalls:sys_exit");
2801 if (evsel &&
2802 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2803 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2804 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2805 goto out;
2806 }
2807
2808 evlist__for_each(session->evlist, evsel) {
2809 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2810 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2811 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2812 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2813 evsel->handler = trace__pgfault;
2814 }
2815
2816 err = parse_target_str(trace);
2817 if (err != 0)
2818 goto out;
2819
2820 setup_pager();
2821
2822 err = perf_session__process_events(session);
2823 if (err)
2824 pr_err("Failed to process events, error %d", err);
2825
2826 else if (trace->summary)
2827 trace__fprintf_thread_summary(trace, trace->output);
2828
2829 out:
2830 perf_session__delete(session);
2831
2832 return err;
2833 }
2834
trace__fprintf_threads_header(FILE * fp)2835 static size_t trace__fprintf_threads_header(FILE *fp)
2836 {
2837 size_t printed;
2838
2839 printed = fprintf(fp, "\n Summary of events:\n\n");
2840
2841 return printed;
2842 }
2843
thread__dump_stats(struct thread_trace * ttrace,struct trace * trace,FILE * fp)2844 static size_t thread__dump_stats(struct thread_trace *ttrace,
2845 struct trace *trace, FILE *fp)
2846 {
2847 struct stats *stats;
2848 size_t printed = 0;
2849 struct syscall *sc;
2850 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2851
2852 if (inode == NULL)
2853 return 0;
2854
2855 printed += fprintf(fp, "\n");
2856
2857 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2858 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2859 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2860
2861 /* each int_node is a syscall */
2862 while (inode) {
2863 stats = inode->priv;
2864 if (stats) {
2865 double min = (double)(stats->min) / NSEC_PER_MSEC;
2866 double max = (double)(stats->max) / NSEC_PER_MSEC;
2867 double avg = avg_stats(stats);
2868 double pct;
2869 u64 n = (u64) stats->n;
2870
2871 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2872 avg /= NSEC_PER_MSEC;
2873
2874 sc = &trace->syscalls.table[inode->i];
2875 printed += fprintf(fp, " %-15s", sc->name);
2876 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2877 n, avg * n, min, avg);
2878 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2879 }
2880
2881 inode = intlist__next(inode);
2882 }
2883
2884 printed += fprintf(fp, "\n\n");
2885
2886 return printed;
2887 }
2888
2889 /* struct used to pass data to per-thread function */
2890 struct summary_data {
2891 FILE *fp;
2892 struct trace *trace;
2893 size_t printed;
2894 };
2895
trace__fprintf_one_thread(struct thread * thread,void * priv)2896 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2897 {
2898 struct summary_data *data = priv;
2899 FILE *fp = data->fp;
2900 size_t printed = data->printed;
2901 struct trace *trace = data->trace;
2902 struct thread_trace *ttrace = thread__priv(thread);
2903 double ratio;
2904
2905 if (ttrace == NULL)
2906 return 0;
2907
2908 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2909
2910 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2911 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2912 printed += fprintf(fp, "%.1f%%", ratio);
2913 if (ttrace->pfmaj)
2914 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2915 if (ttrace->pfmin)
2916 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2917 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2918 printed += thread__dump_stats(ttrace, trace, fp);
2919
2920 data->printed += printed;
2921
2922 return 0;
2923 }
2924
trace__fprintf_thread_summary(struct trace * trace,FILE * fp)2925 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2926 {
2927 struct summary_data data = {
2928 .fp = fp,
2929 .trace = trace
2930 };
2931 data.printed = trace__fprintf_threads_header(fp);
2932
2933 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2934
2935 return data.printed;
2936 }
2937
trace__set_duration(const struct option * opt,const char * str,int unset __maybe_unused)2938 static int trace__set_duration(const struct option *opt, const char *str,
2939 int unset __maybe_unused)
2940 {
2941 struct trace *trace = opt->value;
2942
2943 trace->duration_filter = atof(str);
2944 return 0;
2945 }
2946
trace__set_filter_pids(const struct option * opt,const char * str,int unset __maybe_unused)2947 static int trace__set_filter_pids(const struct option *opt, const char *str,
2948 int unset __maybe_unused)
2949 {
2950 int ret = -1;
2951 size_t i;
2952 struct trace *trace = opt->value;
2953 /*
2954 * FIXME: introduce a intarray class, plain parse csv and create a
2955 * { int nr, int entries[] } struct...
2956 */
2957 struct intlist *list = intlist__new(str);
2958
2959 if (list == NULL)
2960 return -1;
2961
2962 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2963 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2964
2965 if (trace->filter_pids.entries == NULL)
2966 goto out;
2967
2968 trace->filter_pids.entries[0] = getpid();
2969
2970 for (i = 1; i < trace->filter_pids.nr; ++i)
2971 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2972
2973 intlist__delete(list);
2974 ret = 0;
2975 out:
2976 return ret;
2977 }
2978
trace__open_output(struct trace * trace,const char * filename)2979 static int trace__open_output(struct trace *trace, const char *filename)
2980 {
2981 struct stat st;
2982
2983 if (!stat(filename, &st) && st.st_size) {
2984 char oldname[PATH_MAX];
2985
2986 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2987 unlink(oldname);
2988 rename(filename, oldname);
2989 }
2990
2991 trace->output = fopen(filename, "w");
2992
2993 return trace->output == NULL ? -errno : 0;
2994 }
2995
parse_pagefaults(const struct option * opt,const char * str,int unset __maybe_unused)2996 static int parse_pagefaults(const struct option *opt, const char *str,
2997 int unset __maybe_unused)
2998 {
2999 int *trace_pgfaults = opt->value;
3000
3001 if (strcmp(str, "all") == 0)
3002 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3003 else if (strcmp(str, "maj") == 0)
3004 *trace_pgfaults |= TRACE_PFMAJ;
3005 else if (strcmp(str, "min") == 0)
3006 *trace_pgfaults |= TRACE_PFMIN;
3007 else
3008 return -1;
3009
3010 return 0;
3011 }
3012
evlist__set_evsel_handler(struct perf_evlist * evlist,void * handler)3013 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3014 {
3015 struct perf_evsel *evsel;
3016
3017 evlist__for_each(evlist, evsel)
3018 evsel->handler = handler;
3019 }
3020
cmd_trace(int argc,const char ** argv,const char * prefix __maybe_unused)3021 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3022 {
3023 const char *trace_usage[] = {
3024 "perf trace [<options>] [<command>]",
3025 "perf trace [<options>] -- <command> [<options>]",
3026 "perf trace record [<options>] [<command>]",
3027 "perf trace record [<options>] -- <command> [<options>]",
3028 NULL
3029 };
3030 struct trace trace = {
3031 .audit = {
3032 .machine = audit_detect_machine(),
3033 .open_id = audit_name_to_syscall("open", trace.audit.machine),
3034 },
3035 .syscalls = {
3036 . max = -1,
3037 },
3038 .opts = {
3039 .target = {
3040 .uid = UINT_MAX,
3041 .uses_mmap = true,
3042 },
3043 .user_freq = UINT_MAX,
3044 .user_interval = ULLONG_MAX,
3045 .no_buffering = true,
3046 .mmap_pages = UINT_MAX,
3047 .proc_map_timeout = 500,
3048 },
3049 .output = stderr,
3050 .show_comm = true,
3051 .trace_syscalls = true,
3052 };
3053 const char *output_name = NULL;
3054 const char *ev_qualifier_str = NULL;
3055 const struct option trace_options[] = {
3056 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3057 "event selector. use 'perf list' to list available events",
3058 parse_events_option),
3059 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3060 "show the thread COMM next to its id"),
3061 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3062 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3063 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3064 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3065 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3066 "trace events on existing process id"),
3067 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3068 "trace events on existing thread id"),
3069 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3070 "pids to filter (by the kernel)", trace__set_filter_pids),
3071 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3072 "system-wide collection from all CPUs"),
3073 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3074 "list of cpus to monitor"),
3075 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3076 "child tasks do not inherit counters"),
3077 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3078 "number of mmap data pages",
3079 perf_evlist__parse_mmap_pages),
3080 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3081 "user to profile"),
3082 OPT_CALLBACK(0, "duration", &trace, "float",
3083 "show only events with duration > N.M ms",
3084 trace__set_duration),
3085 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3086 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3087 OPT_BOOLEAN('T', "time", &trace.full_time,
3088 "Show full timestamp, not time relative to first start"),
3089 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3090 "Show only syscall summary with statistics"),
3091 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3092 "Show all syscalls and summary with statistics"),
3093 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3094 "Trace pagefaults", parse_pagefaults, "maj"),
3095 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3096 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3097 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3098 "per thread proc mmap processing timeout in ms"),
3099 OPT_END()
3100 };
3101 const char * const trace_subcommands[] = { "record", NULL };
3102 int err;
3103 char bf[BUFSIZ];
3104
3105 signal(SIGSEGV, sighandler_dump_stack);
3106 signal(SIGFPE, sighandler_dump_stack);
3107
3108 trace.evlist = perf_evlist__new();
3109
3110 if (trace.evlist == NULL) {
3111 pr_err("Not enough memory to run!\n");
3112 err = -ENOMEM;
3113 goto out;
3114 }
3115
3116 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3117 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3118
3119 if (trace.trace_pgfaults) {
3120 trace.opts.sample_address = true;
3121 trace.opts.sample_time = true;
3122 }
3123
3124 if (trace.evlist->nr_entries > 0)
3125 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3126
3127 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3128 return trace__record(&trace, argc-1, &argv[1]);
3129
3130 /* summary_only implies summary option, but don't overwrite summary if set */
3131 if (trace.summary_only)
3132 trace.summary = trace.summary_only;
3133
3134 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3135 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3136 pr_err("Please specify something to trace.\n");
3137 return -1;
3138 }
3139
3140 if (output_name != NULL) {
3141 err = trace__open_output(&trace, output_name);
3142 if (err < 0) {
3143 perror("failed to create output file");
3144 goto out;
3145 }
3146 }
3147
3148 if (ev_qualifier_str != NULL) {
3149 const char *s = ev_qualifier_str;
3150 struct strlist_config slist_config = {
3151 .dirname = system_path(STRACE_GROUPS_DIR),
3152 };
3153
3154 trace.not_ev_qualifier = *s == '!';
3155 if (trace.not_ev_qualifier)
3156 ++s;
3157 trace.ev_qualifier = strlist__new(s, &slist_config);
3158 if (trace.ev_qualifier == NULL) {
3159 fputs("Not enough memory to parse event qualifier",
3160 trace.output);
3161 err = -ENOMEM;
3162 goto out_close;
3163 }
3164
3165 err = trace__validate_ev_qualifier(&trace);
3166 if (err)
3167 goto out_close;
3168 }
3169
3170 err = target__validate(&trace.opts.target);
3171 if (err) {
3172 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3173 fprintf(trace.output, "%s", bf);
3174 goto out_close;
3175 }
3176
3177 err = target__parse_uid(&trace.opts.target);
3178 if (err) {
3179 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3180 fprintf(trace.output, "%s", bf);
3181 goto out_close;
3182 }
3183
3184 if (!argc && target__none(&trace.opts.target))
3185 trace.opts.target.system_wide = true;
3186
3187 if (input_name)
3188 err = trace__replay(&trace);
3189 else
3190 err = trace__run(&trace, argc, argv);
3191
3192 out_close:
3193 if (output_name != NULL)
3194 fclose(trace.output);
3195 out:
3196 return err;
3197 }
3198