1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <linux/random.h>
47 #include <linux/stringify.h>
48 #include <linux/time64.h>
49
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC 02000000
52 #endif
53
54 struct trace {
55 struct perf_tool tool;
56 struct syscalltbl *sctbl;
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
77 struct intlist *tid_list;
78 struct intlist *pid_list;
79 struct {
80 size_t nr;
81 pid_t *entries;
82 } filter_pids;
83 double duration_filter;
84 double runtime_ms;
85 struct {
86 u64 vfs_getname,
87 proc_getname;
88 } stats;
89 unsigned int max_stack;
90 unsigned int min_stack;
91 bool not_ev_qualifier;
92 bool live;
93 bool full_time;
94 bool sched;
95 bool multiple_threads;
96 bool summary;
97 bool summary_only;
98 bool show_comm;
99 bool show_tool_stats;
100 bool trace_syscalls;
101 bool kernel_syscallchains;
102 bool force;
103 bool vfs_getname;
104 int trace_pgfaults;
105 int open_id;
106 };
107
108 struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 };
114 };
115
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
122 }
123
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 return bswap_##bits(value);\
135 }
136
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140
tp_field__init_uint(struct tp_field * field,struct format_field * format_field,bool needs_swap)141 static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
144 {
145 field->offset = format_field->offset;
146
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
162 }
163
164 return 0;
165 }
166
tp_field__ptr(struct tp_field * field,struct perf_sample * sample)167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169 return sample->raw_data + field->offset;
170 }
171
tp_field__init_ptr(struct tp_field * field,struct format_field * format_field)172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
177 }
178
179 struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
183 };
184 };
185
perf_evsel__init_tp_uint_field(struct perf_evsel * evsel,struct tp_field * field,const char * name)186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
189 {
190 struct format_field *format_field = perf_evsel__field(evsel, name);
191
192 if (format_field == NULL)
193 return -1;
194
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
perf_evsel__init_tp_ptr_field(struct perf_evsel * evsel,struct tp_field * field,const char * name)202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
205 {
206 struct format_field *format_field = perf_evsel__field(evsel, name);
207
208 if (format_field == NULL)
209 return -1;
210
211 return tp_field__init_ptr(field, format_field);
212 }
213
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
perf_evsel__delete_priv(struct perf_evsel * evsel)218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220 zfree(&evsel->priv);
221 perf_evsel__delete(evsel);
222 }
223
perf_evsel__init_syscall_tp(struct perf_evsel * evsel,void * handler)224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
230
231 evsel->handler = handler;
232 return 0;
233 }
234
235 return -ENOMEM;
236
237 out_delete:
238 zfree(&evsel->priv);
239 return -ENOENT;
240 }
241
perf_evsel__syscall_newtp(const char * direction,void * handler)242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245
246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 if (IS_ERR(evsel))
248 evsel = perf_evsel__newtp("syscalls", direction);
249
250 if (IS_ERR(evsel))
251 return NULL;
252
253 if (perf_evsel__init_syscall_tp(evsel, handler))
254 goto out_delete;
255
256 return evsel;
257
258 out_delete:
259 perf_evsel__delete_priv(evsel);
260 return NULL;
261 }
262
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 ({ struct syscall_tp *fields = evsel->priv; \
265 fields->name.integer(&fields->name, sample); })
266
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.pointer(&fields->name, sample); })
270
271 struct syscall_arg {
272 unsigned long val;
273 struct thread *thread;
274 struct trace *trace;
275 void *parm;
276 u8 idx;
277 u8 mask;
278 };
279
280 struct strarray {
281 int offset;
282 int nr_entries;
283 const char **entries;
284 };
285
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 .nr_entries = ARRAY_SIZE(array), \
288 .entries = array, \
289 }
290
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 .offset = off, \
293 .nr_entries = ARRAY_SIZE(array), \
294 .entries = array, \
295 }
296
__syscall_arg__scnprintf_strarray(char * bf,size_t size,const char * intfmt,struct syscall_arg * arg)297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
300 {
301 struct strarray *sa = arg->parm;
302 int idx = arg->val - sa->offset;
303
304 if (idx < 0 || idx >= sa->nr_entries)
305 return scnprintf(bf, size, intfmt, arg->val);
306
307 return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309
syscall_arg__scnprintf_strarray(char * bf,size_t size,struct syscall_arg * arg)310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 struct syscall_arg *arg)
312 {
313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320 * FIXME: Make this available to all arches as soon as the ioctl beautifier
321 * gets rewritten to support all arches.
322 */
syscall_arg__scnprintf_strhexarray(char * bf,size_t size,struct syscall_arg * arg)323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 struct syscall_arg *arg)
325 {
326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 struct syscall_arg *arg);
334
335 #define SCA_FD syscall_arg__scnprintf_fd
336
337 #ifndef AT_FDCWD
338 #define AT_FDCWD -100
339 #endif
340
syscall_arg__scnprintf_fd_at(char * bf,size_t size,struct syscall_arg * arg)341 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
342 struct syscall_arg *arg)
343 {
344 int fd = arg->val;
345
346 if (fd == AT_FDCWD)
347 return scnprintf(bf, size, "CWD");
348
349 return syscall_arg__scnprintf_fd(bf, size, arg);
350 }
351
352 #define SCA_FDAT syscall_arg__scnprintf_fd_at
353
354 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
355 struct syscall_arg *arg);
356
357 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
358
syscall_arg__scnprintf_hex(char * bf,size_t size,struct syscall_arg * arg)359 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
360 struct syscall_arg *arg)
361 {
362 return scnprintf(bf, size, "%#lx", arg->val);
363 }
364
365 #define SCA_HEX syscall_arg__scnprintf_hex
366
syscall_arg__scnprintf_int(char * bf,size_t size,struct syscall_arg * arg)367 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
368 struct syscall_arg *arg)
369 {
370 return scnprintf(bf, size, "%d", arg->val);
371 }
372
373 #define SCA_INT syscall_arg__scnprintf_int
374
375 static const char *bpf_cmd[] = {
376 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
377 "MAP_GET_NEXT_KEY", "PROG_LOAD",
378 };
379 static DEFINE_STRARRAY(bpf_cmd);
380
381 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
382 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
383
384 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
385 static DEFINE_STRARRAY(itimers);
386
387 static const char *keyctl_options[] = {
388 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
389 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
390 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
391 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
392 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
393 };
394 static DEFINE_STRARRAY(keyctl_options);
395
396 static const char *whences[] = { "SET", "CUR", "END",
397 #ifdef SEEK_DATA
398 "DATA",
399 #endif
400 #ifdef SEEK_HOLE
401 "HOLE",
402 #endif
403 };
404 static DEFINE_STRARRAY(whences);
405
406 static const char *fcntl_cmds[] = {
407 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
408 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
409 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
410 "F_GETOWNER_UIDS",
411 };
412 static DEFINE_STRARRAY(fcntl_cmds);
413
414 static const char *rlimit_resources[] = {
415 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
416 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
417 "RTTIME",
418 };
419 static DEFINE_STRARRAY(rlimit_resources);
420
421 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
422 static DEFINE_STRARRAY(sighow);
423
424 static const char *clockid[] = {
425 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
426 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
427 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
428 };
429 static DEFINE_STRARRAY(clockid);
430
431 static const char *socket_families[] = {
432 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
433 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
434 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
435 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
436 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
437 "ALG", "NFC", "VSOCK",
438 };
439 static DEFINE_STRARRAY(socket_families);
440
syscall_arg__scnprintf_access_mode(char * bf,size_t size,struct syscall_arg * arg)441 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
442 struct syscall_arg *arg)
443 {
444 size_t printed = 0;
445 int mode = arg->val;
446
447 if (mode == F_OK) /* 0 */
448 return scnprintf(bf, size, "F");
449 #define P_MODE(n) \
450 if (mode & n##_OK) { \
451 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
452 mode &= ~n##_OK; \
453 }
454
455 P_MODE(R);
456 P_MODE(W);
457 P_MODE(X);
458 #undef P_MODE
459
460 if (mode)
461 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
462
463 return printed;
464 }
465
466 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
467
468 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
469 struct syscall_arg *arg);
470
471 #define SCA_FILENAME syscall_arg__scnprintf_filename
472
syscall_arg__scnprintf_pipe_flags(char * bf,size_t size,struct syscall_arg * arg)473 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
474 struct syscall_arg *arg)
475 {
476 int printed = 0, flags = arg->val;
477
478 #define P_FLAG(n) \
479 if (flags & O_##n) { \
480 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
481 flags &= ~O_##n; \
482 }
483
484 P_FLAG(CLOEXEC);
485 P_FLAG(NONBLOCK);
486 #undef P_FLAG
487
488 if (flags)
489 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
490
491 return printed;
492 }
493
494 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
495
496 #if defined(__i386__) || defined(__x86_64__)
497 /*
498 * FIXME: Make this available to all arches.
499 */
500 #define TCGETS 0x5401
501
502 static const char *tioctls[] = {
503 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
504 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
505 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
506 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
507 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
508 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
509 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
510 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
511 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
512 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
513 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
514 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
515 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
516 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
517 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
518 };
519
520 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
521 #endif /* defined(__i386__) || defined(__x86_64__) */
522
523 #ifndef GRND_NONBLOCK
524 #define GRND_NONBLOCK 0x0001
525 #endif
526 #ifndef GRND_RANDOM
527 #define GRND_RANDOM 0x0002
528 #endif
529
syscall_arg__scnprintf_getrandom_flags(char * bf,size_t size,struct syscall_arg * arg)530 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
531 struct syscall_arg *arg)
532 {
533 int printed = 0, flags = arg->val;
534
535 #define P_FLAG(n) \
536 if (flags & GRND_##n) { \
537 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
538 flags &= ~GRND_##n; \
539 }
540
541 P_FLAG(RANDOM);
542 P_FLAG(NONBLOCK);
543 #undef P_FLAG
544
545 if (flags)
546 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
547
548 return printed;
549 }
550
551 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
552
553 #define STRARRAY(arg, name, array) \
554 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
555 .arg_parm = { [arg] = &strarray__##array, }
556
557 #include "trace/beauty/eventfd.c"
558 #include "trace/beauty/flock.c"
559 #include "trace/beauty/futex_op.c"
560 #include "trace/beauty/mmap.c"
561 #include "trace/beauty/mode_t.c"
562 #include "trace/beauty/msg_flags.c"
563 #include "trace/beauty/open_flags.c"
564 #include "trace/beauty/perf_event_open.c"
565 #include "trace/beauty/pid.c"
566 #include "trace/beauty/sched_policy.c"
567 #include "trace/beauty/seccomp.c"
568 #include "trace/beauty/signum.c"
569 #include "trace/beauty/socket_type.c"
570 #include "trace/beauty/waitid_options.c"
571
572 static struct syscall_fmt {
573 const char *name;
574 const char *alias;
575 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
576 void *arg_parm[6];
577 bool errmsg;
578 bool errpid;
579 bool timeout;
580 bool hexret;
581 } syscall_fmts[] = {
582 { .name = "access", .errmsg = true,
583 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
584 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
585 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
586 { .name = "brk", .hexret = true,
587 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
588 { .name = "chdir", .errmsg = true, },
589 { .name = "chmod", .errmsg = true, },
590 { .name = "chroot", .errmsg = true, },
591 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
592 { .name = "clone", .errpid = true, },
593 { .name = "close", .errmsg = true,
594 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
595 { .name = "connect", .errmsg = true, },
596 { .name = "creat", .errmsg = true, },
597 { .name = "dup", .errmsg = true, },
598 { .name = "dup2", .errmsg = true, },
599 { .name = "dup3", .errmsg = true, },
600 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
601 { .name = "eventfd2", .errmsg = true,
602 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
603 { .name = "faccessat", .errmsg = true, },
604 { .name = "fadvise64", .errmsg = true, },
605 { .name = "fallocate", .errmsg = true, },
606 { .name = "fchdir", .errmsg = true, },
607 { .name = "fchmod", .errmsg = true, },
608 { .name = "fchmodat", .errmsg = true,
609 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
610 { .name = "fchown", .errmsg = true, },
611 { .name = "fchownat", .errmsg = true,
612 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
613 { .name = "fcntl", .errmsg = true,
614 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
615 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
616 { .name = "fdatasync", .errmsg = true, },
617 { .name = "flock", .errmsg = true,
618 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
619 { .name = "fsetxattr", .errmsg = true, },
620 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
621 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
622 { .name = "fstatfs", .errmsg = true, },
623 { .name = "fsync", .errmsg = true, },
624 { .name = "ftruncate", .errmsg = true, },
625 { .name = "futex", .errmsg = true,
626 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
627 { .name = "futimesat", .errmsg = true,
628 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
629 { .name = "getdents", .errmsg = true, },
630 { .name = "getdents64", .errmsg = true, },
631 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
632 { .name = "getpid", .errpid = true, },
633 { .name = "getpgid", .errpid = true, },
634 { .name = "getppid", .errpid = true, },
635 { .name = "getrandom", .errmsg = true,
636 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
637 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
638 { .name = "getxattr", .errmsg = true, },
639 { .name = "inotify_add_watch", .errmsg = true, },
640 { .name = "ioctl", .errmsg = true,
641 .arg_scnprintf = {
642 #if defined(__i386__) || defined(__x86_64__)
643 /*
644 * FIXME: Make this available to all arches.
645 */
646 [1] = SCA_STRHEXARRAY, /* cmd */
647 [2] = SCA_HEX, /* arg */ },
648 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
649 #else
650 [2] = SCA_HEX, /* arg */ }, },
651 #endif
652 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
653 { .name = "kill", .errmsg = true,
654 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
655 { .name = "lchown", .errmsg = true, },
656 { .name = "lgetxattr", .errmsg = true, },
657 { .name = "linkat", .errmsg = true,
658 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
659 { .name = "listxattr", .errmsg = true, },
660 { .name = "llistxattr", .errmsg = true, },
661 { .name = "lremovexattr", .errmsg = true, },
662 { .name = "lseek", .errmsg = true,
663 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
664 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
665 { .name = "lsetxattr", .errmsg = true, },
666 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
667 { .name = "lsxattr", .errmsg = true, },
668 { .name = "madvise", .errmsg = true,
669 .arg_scnprintf = { [0] = SCA_HEX, /* start */
670 [2] = SCA_MADV_BHV, /* behavior */ }, },
671 { .name = "mkdir", .errmsg = true, },
672 { .name = "mkdirat", .errmsg = true,
673 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
674 { .name = "mknod", .errmsg = true, },
675 { .name = "mknodat", .errmsg = true,
676 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
677 { .name = "mlock", .errmsg = true,
678 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
679 { .name = "mlockall", .errmsg = true,
680 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
681 { .name = "mmap", .hexret = true,
682 /* The standard mmap maps to old_mmap on s390x */
683 #if defined(__s390x__)
684 .alias = "old_mmap",
685 #endif
686 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
687 [2] = SCA_MMAP_PROT, /* prot */
688 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
689 { .name = "mprotect", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_HEX, /* start */
691 [2] = SCA_MMAP_PROT, /* prot */ }, },
692 { .name = "mq_unlink", .errmsg = true,
693 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
694 { .name = "mremap", .hexret = true,
695 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
696 [3] = SCA_MREMAP_FLAGS, /* flags */
697 [4] = SCA_HEX, /* new_addr */ }, },
698 { .name = "munlock", .errmsg = true,
699 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
700 { .name = "munmap", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
702 { .name = "name_to_handle_at", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
704 { .name = "newfstatat", .errmsg = true,
705 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
706 { .name = "open", .errmsg = true,
707 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
708 { .name = "open_by_handle_at", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
710 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
711 { .name = "openat", .errmsg = true,
712 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
713 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
714 { .name = "perf_event_open", .errmsg = true,
715 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
716 [3] = SCA_FD, /* group_fd */
717 [4] = SCA_PERF_FLAGS, /* flags */ }, },
718 { .name = "pipe2", .errmsg = true,
719 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
720 { .name = "poll", .errmsg = true, .timeout = true, },
721 { .name = "ppoll", .errmsg = true, .timeout = true, },
722 { .name = "pread", .errmsg = true, .alias = "pread64", },
723 { .name = "preadv", .errmsg = true, .alias = "pread", },
724 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
725 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
726 { .name = "pwritev", .errmsg = true, },
727 { .name = "read", .errmsg = true, },
728 { .name = "readlink", .errmsg = true, },
729 { .name = "readlinkat", .errmsg = true,
730 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
731 { .name = "readv", .errmsg = true, },
732 { .name = "recvfrom", .errmsg = true,
733 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
734 { .name = "recvmmsg", .errmsg = true,
735 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
736 { .name = "recvmsg", .errmsg = true,
737 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
738 { .name = "removexattr", .errmsg = true, },
739 { .name = "renameat", .errmsg = true,
740 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
741 { .name = "rmdir", .errmsg = true, },
742 { .name = "rt_sigaction", .errmsg = true,
743 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
744 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
745 { .name = "rt_sigqueueinfo", .errmsg = true,
746 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
747 { .name = "rt_tgsigqueueinfo", .errmsg = true,
748 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
749 { .name = "sched_getattr", .errmsg = true, },
750 { .name = "sched_setattr", .errmsg = true, },
751 { .name = "sched_setscheduler", .errmsg = true,
752 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
753 { .name = "seccomp", .errmsg = true,
754 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
755 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
756 { .name = "select", .errmsg = true, .timeout = true, },
757 { .name = "sendmmsg", .errmsg = true,
758 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
759 { .name = "sendmsg", .errmsg = true,
760 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
761 { .name = "sendto", .errmsg = true,
762 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
763 { .name = "set_tid_address", .errpid = true, },
764 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
765 { .name = "setpgid", .errmsg = true, },
766 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
767 { .name = "setxattr", .errmsg = true, },
768 { .name = "shutdown", .errmsg = true, },
769 { .name = "socket", .errmsg = true,
770 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
771 [1] = SCA_SK_TYPE, /* type */ },
772 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
773 { .name = "socketpair", .errmsg = true,
774 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
775 [1] = SCA_SK_TYPE, /* type */ },
776 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
777 { .name = "stat", .errmsg = true, .alias = "newstat", },
778 { .name = "statfs", .errmsg = true, },
779 { .name = "swapoff", .errmsg = true,
780 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
781 { .name = "swapon", .errmsg = true,
782 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
783 { .name = "symlinkat", .errmsg = true,
784 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
785 { .name = "tgkill", .errmsg = true,
786 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
787 { .name = "tkill", .errmsg = true,
788 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
789 { .name = "truncate", .errmsg = true, },
790 { .name = "uname", .errmsg = true, .alias = "newuname", },
791 { .name = "unlinkat", .errmsg = true,
792 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
793 { .name = "utime", .errmsg = true, },
794 { .name = "utimensat", .errmsg = true,
795 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
796 { .name = "utimes", .errmsg = true, },
797 { .name = "vmsplice", .errmsg = true, },
798 { .name = "wait4", .errpid = true,
799 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
800 { .name = "waitid", .errpid = true,
801 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
802 { .name = "write", .errmsg = true, },
803 { .name = "writev", .errmsg = true, },
804 };
805
syscall_fmt__cmp(const void * name,const void * fmtp)806 static int syscall_fmt__cmp(const void *name, const void *fmtp)
807 {
808 const struct syscall_fmt *fmt = fmtp;
809 return strcmp(name, fmt->name);
810 }
811
syscall_fmt__find(const char * name)812 static struct syscall_fmt *syscall_fmt__find(const char *name)
813 {
814 const int nmemb = ARRAY_SIZE(syscall_fmts);
815 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
816 }
817
818 struct syscall {
819 struct event_format *tp_format;
820 int nr_args;
821 struct format_field *args;
822 const char *name;
823 bool is_exit;
824 struct syscall_fmt *fmt;
825 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
826 void **arg_parm;
827 };
828
829 /*
830 * We need to have this 'calculated' boolean because in some cases we really
831 * don't know what is the duration of a syscall, for instance, when we start
832 * a session and some threads are waiting for a syscall to finish, say 'poll',
833 * in which case all we can do is to print "( ? ) for duration and for the
834 * start timestamp.
835 */
fprintf_duration(unsigned long t,bool calculated,FILE * fp)836 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
837 {
838 double duration = (double)t / NSEC_PER_MSEC;
839 size_t printed = fprintf(fp, "(");
840
841 if (!calculated)
842 printed += fprintf(fp, " ? ");
843 else if (duration >= 1.0)
844 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
845 else if (duration >= 0.01)
846 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
847 else
848 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
849 return printed + fprintf(fp, "): ");
850 }
851
852 /**
853 * filename.ptr: The filename char pointer that will be vfs_getname'd
854 * filename.entry_str_pos: Where to insert the string translated from
855 * filename.ptr by the vfs_getname tracepoint/kprobe.
856 */
857 struct thread_trace {
858 u64 entry_time;
859 u64 exit_time;
860 bool entry_pending;
861 unsigned long nr_events;
862 unsigned long pfmaj, pfmin;
863 char *entry_str;
864 double runtime_ms;
865 struct {
866 unsigned long ptr;
867 short int entry_str_pos;
868 bool pending_open;
869 unsigned int namelen;
870 char *name;
871 } filename;
872 struct {
873 int max;
874 char **table;
875 } paths;
876
877 struct intlist *syscall_stats;
878 };
879
thread_trace__new(void)880 static struct thread_trace *thread_trace__new(void)
881 {
882 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
883
884 if (ttrace)
885 ttrace->paths.max = -1;
886
887 ttrace->syscall_stats = intlist__new(NULL);
888
889 return ttrace;
890 }
891
thread__trace(struct thread * thread,FILE * fp)892 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
893 {
894 struct thread_trace *ttrace;
895
896 if (thread == NULL)
897 goto fail;
898
899 if (thread__priv(thread) == NULL)
900 thread__set_priv(thread, thread_trace__new());
901
902 if (thread__priv(thread) == NULL)
903 goto fail;
904
905 ttrace = thread__priv(thread);
906 ++ttrace->nr_events;
907
908 return ttrace;
909 fail:
910 color_fprintf(fp, PERF_COLOR_RED,
911 "WARNING: not enough memory, dropping samples!\n");
912 return NULL;
913 }
914
915 #define TRACE_PFMAJ (1 << 0)
916 #define TRACE_PFMIN (1 << 1)
917
918 static const size_t trace__entry_str_size = 2048;
919
trace__set_fd_pathname(struct thread * thread,int fd,const char * pathname)920 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
921 {
922 struct thread_trace *ttrace = thread__priv(thread);
923
924 if (fd > ttrace->paths.max) {
925 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
926
927 if (npath == NULL)
928 return -1;
929
930 if (ttrace->paths.max != -1) {
931 memset(npath + ttrace->paths.max + 1, 0,
932 (fd - ttrace->paths.max) * sizeof(char *));
933 } else {
934 memset(npath, 0, (fd + 1) * sizeof(char *));
935 }
936
937 ttrace->paths.table = npath;
938 ttrace->paths.max = fd;
939 }
940
941 ttrace->paths.table[fd] = strdup(pathname);
942
943 return ttrace->paths.table[fd] != NULL ? 0 : -1;
944 }
945
thread__read_fd_path(struct thread * thread,int fd)946 static int thread__read_fd_path(struct thread *thread, int fd)
947 {
948 char linkname[PATH_MAX], pathname[PATH_MAX];
949 struct stat st;
950 int ret;
951
952 if (thread->pid_ == thread->tid) {
953 scnprintf(linkname, sizeof(linkname),
954 "/proc/%d/fd/%d", thread->pid_, fd);
955 } else {
956 scnprintf(linkname, sizeof(linkname),
957 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
958 }
959
960 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
961 return -1;
962
963 ret = readlink(linkname, pathname, sizeof(pathname));
964
965 if (ret < 0 || ret > st.st_size)
966 return -1;
967
968 pathname[ret] = '\0';
969 return trace__set_fd_pathname(thread, fd, pathname);
970 }
971
thread__fd_path(struct thread * thread,int fd,struct trace * trace)972 static const char *thread__fd_path(struct thread *thread, int fd,
973 struct trace *trace)
974 {
975 struct thread_trace *ttrace = thread__priv(thread);
976
977 if (ttrace == NULL)
978 return NULL;
979
980 if (fd < 0)
981 return NULL;
982
983 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
984 if (!trace->live)
985 return NULL;
986 ++trace->stats.proc_getname;
987 if (thread__read_fd_path(thread, fd))
988 return NULL;
989 }
990
991 return ttrace->paths.table[fd];
992 }
993
syscall_arg__scnprintf_fd(char * bf,size_t size,struct syscall_arg * arg)994 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
995 struct syscall_arg *arg)
996 {
997 int fd = arg->val;
998 size_t printed = scnprintf(bf, size, "%d", fd);
999 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1000
1001 if (path)
1002 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1003
1004 return printed;
1005 }
1006
syscall_arg__scnprintf_close_fd(char * bf,size_t size,struct syscall_arg * arg)1007 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1008 struct syscall_arg *arg)
1009 {
1010 int fd = arg->val;
1011 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1012 struct thread_trace *ttrace = thread__priv(arg->thread);
1013
1014 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1015 zfree(&ttrace->paths.table[fd]);
1016
1017 return printed;
1018 }
1019
thread__set_filename_pos(struct thread * thread,const char * bf,unsigned long ptr)1020 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1021 unsigned long ptr)
1022 {
1023 struct thread_trace *ttrace = thread__priv(thread);
1024
1025 ttrace->filename.ptr = ptr;
1026 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1027 }
1028
syscall_arg__scnprintf_filename(char * bf,size_t size,struct syscall_arg * arg)1029 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1030 struct syscall_arg *arg)
1031 {
1032 unsigned long ptr = arg->val;
1033
1034 if (!arg->trace->vfs_getname)
1035 return scnprintf(bf, size, "%#x", ptr);
1036
1037 thread__set_filename_pos(arg->thread, bf, ptr);
1038 return 0;
1039 }
1040
trace__filter_duration(struct trace * trace,double t)1041 static bool trace__filter_duration(struct trace *trace, double t)
1042 {
1043 return t < (trace->duration_filter * NSEC_PER_MSEC);
1044 }
1045
__trace__fprintf_tstamp(struct trace * trace,u64 tstamp,FILE * fp)1046 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1047 {
1048 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1049
1050 return fprintf(fp, "%10.3f ", ts);
1051 }
1052
1053 /*
1054 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1055 * using ttrace->entry_time for a thread that receives a sys_exit without
1056 * first having received a sys_enter ("poll" issued before tracing session
1057 * starts, lost sys_enter exit due to ring buffer overflow).
1058 */
trace__fprintf_tstamp(struct trace * trace,u64 tstamp,FILE * fp)1059 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1060 {
1061 if (tstamp > 0)
1062 return __trace__fprintf_tstamp(trace, tstamp, fp);
1063
1064 return fprintf(fp, " ? ");
1065 }
1066
1067 static bool done = false;
1068 static bool interrupted = false;
1069
sig_handler(int sig)1070 static void sig_handler(int sig)
1071 {
1072 done = true;
1073 interrupted = sig == SIGINT;
1074 }
1075
trace__fprintf_entry_head(struct trace * trace,struct thread * thread,u64 duration,bool duration_calculated,u64 tstamp,FILE * fp)1076 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1077 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1078 {
1079 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1080 printed += fprintf_duration(duration, duration_calculated, fp);
1081
1082 if (trace->multiple_threads) {
1083 if (trace->show_comm)
1084 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1085 printed += fprintf(fp, "%d ", thread->tid);
1086 }
1087
1088 return printed;
1089 }
1090
trace__process_event(struct trace * trace,struct machine * machine,union perf_event * event,struct perf_sample * sample)1091 static int trace__process_event(struct trace *trace, struct machine *machine,
1092 union perf_event *event, struct perf_sample *sample)
1093 {
1094 int ret = 0;
1095
1096 switch (event->header.type) {
1097 case PERF_RECORD_LOST:
1098 color_fprintf(trace->output, PERF_COLOR_RED,
1099 "LOST %" PRIu64 " events!\n", event->lost.lost);
1100 ret = machine__process_lost_event(machine, event, sample);
1101 break;
1102 default:
1103 ret = machine__process_event(machine, event, sample);
1104 break;
1105 }
1106
1107 return ret;
1108 }
1109
trace__tool_process(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1110 static int trace__tool_process(struct perf_tool *tool,
1111 union perf_event *event,
1112 struct perf_sample *sample,
1113 struct machine *machine)
1114 {
1115 struct trace *trace = container_of(tool, struct trace, tool);
1116 return trace__process_event(trace, machine, event, sample);
1117 }
1118
trace__machine__resolve_kernel_addr(void * vmachine,unsigned long long * addrp,char ** modp)1119 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1120 {
1121 struct machine *machine = vmachine;
1122
1123 if (machine->kptr_restrict_warned)
1124 return NULL;
1125
1126 if (symbol_conf.kptr_restrict) {
1127 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1128 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1129 "Kernel samples will not be resolved.\n");
1130 machine->kptr_restrict_warned = true;
1131 return NULL;
1132 }
1133
1134 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1135 }
1136
trace__symbols_init(struct trace * trace,struct perf_evlist * evlist)1137 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1138 {
1139 int err = symbol__init(NULL);
1140
1141 if (err)
1142 return err;
1143
1144 trace->host = machine__new_host();
1145 if (trace->host == NULL)
1146 return -ENOMEM;
1147
1148 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1149 return -errno;
1150
1151 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1152 evlist->threads, trace__tool_process, false,
1153 trace->opts.proc_map_timeout);
1154 if (err)
1155 symbol__exit();
1156
1157 return err;
1158 }
1159
syscall__set_arg_fmts(struct syscall * sc)1160 static int syscall__set_arg_fmts(struct syscall *sc)
1161 {
1162 struct format_field *field;
1163 int idx = 0, len;
1164
1165 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1166 if (sc->arg_scnprintf == NULL)
1167 return -1;
1168
1169 if (sc->fmt)
1170 sc->arg_parm = sc->fmt->arg_parm;
1171
1172 for (field = sc->args; field; field = field->next) {
1173 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1174 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1175 else if (strcmp(field->type, "const char *") == 0 &&
1176 (strcmp(field->name, "filename") == 0 ||
1177 strcmp(field->name, "path") == 0 ||
1178 strcmp(field->name, "pathname") == 0))
1179 sc->arg_scnprintf[idx] = SCA_FILENAME;
1180 else if (field->flags & FIELD_IS_POINTER)
1181 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1182 else if (strcmp(field->type, "pid_t") == 0)
1183 sc->arg_scnprintf[idx] = SCA_PID;
1184 else if (strcmp(field->type, "umode_t") == 0)
1185 sc->arg_scnprintf[idx] = SCA_MODE_T;
1186 else if ((strcmp(field->type, "int") == 0 ||
1187 strcmp(field->type, "unsigned int") == 0 ||
1188 strcmp(field->type, "long") == 0) &&
1189 (len = strlen(field->name)) >= 2 &&
1190 strcmp(field->name + len - 2, "fd") == 0) {
1191 /*
1192 * /sys/kernel/tracing/events/syscalls/sys_enter*
1193 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1194 * 65 int
1195 * 23 unsigned int
1196 * 7 unsigned long
1197 */
1198 sc->arg_scnprintf[idx] = SCA_FD;
1199 }
1200 ++idx;
1201 }
1202
1203 return 0;
1204 }
1205
trace__read_syscall_info(struct trace * trace,int id)1206 static int trace__read_syscall_info(struct trace *trace, int id)
1207 {
1208 char tp_name[128];
1209 struct syscall *sc;
1210 const char *name = syscalltbl__name(trace->sctbl, id);
1211
1212 if (name == NULL)
1213 return -1;
1214
1215 if (id > trace->syscalls.max) {
1216 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1217
1218 if (nsyscalls == NULL)
1219 return -1;
1220
1221 if (trace->syscalls.max != -1) {
1222 memset(nsyscalls + trace->syscalls.max + 1, 0,
1223 (id - trace->syscalls.max) * sizeof(*sc));
1224 } else {
1225 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1226 }
1227
1228 trace->syscalls.table = nsyscalls;
1229 trace->syscalls.max = id;
1230 }
1231
1232 sc = trace->syscalls.table + id;
1233 sc->name = name;
1234
1235 sc->fmt = syscall_fmt__find(sc->name);
1236
1237 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1238 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1239
1240 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1241 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1242 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1243 }
1244
1245 if (IS_ERR(sc->tp_format))
1246 return -1;
1247
1248 sc->args = sc->tp_format->format.fields;
1249 sc->nr_args = sc->tp_format->format.nr_fields;
1250 /*
1251 * We need to check and discard the first variable '__syscall_nr'
1252 * or 'nr' that mean the syscall number. It is needless here.
1253 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1254 */
1255 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1256 sc->args = sc->args->next;
1257 --sc->nr_args;
1258 }
1259
1260 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1261
1262 return syscall__set_arg_fmts(sc);
1263 }
1264
trace__validate_ev_qualifier(struct trace * trace)1265 static int trace__validate_ev_qualifier(struct trace *trace)
1266 {
1267 int err = 0, i;
1268 struct str_node *pos;
1269
1270 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1271 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1272 sizeof(trace->ev_qualifier_ids.entries[0]));
1273
1274 if (trace->ev_qualifier_ids.entries == NULL) {
1275 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1276 trace->output);
1277 err = -EINVAL;
1278 goto out;
1279 }
1280
1281 i = 0;
1282
1283 strlist__for_each_entry(pos, trace->ev_qualifier) {
1284 const char *sc = pos->s;
1285 int id = syscalltbl__id(trace->sctbl, sc);
1286
1287 if (id < 0) {
1288 if (err == 0) {
1289 fputs("Error:\tInvalid syscall ", trace->output);
1290 err = -EINVAL;
1291 } else {
1292 fputs(", ", trace->output);
1293 }
1294
1295 fputs(sc, trace->output);
1296 }
1297
1298 trace->ev_qualifier_ids.entries[i++] = id;
1299 }
1300
1301 if (err < 0) {
1302 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1303 "\nHint:\tand: 'man syscalls'\n", trace->output);
1304 zfree(&trace->ev_qualifier_ids.entries);
1305 trace->ev_qualifier_ids.nr = 0;
1306 }
1307 out:
1308 return err;
1309 }
1310
1311 /*
1312 * args is to be interpreted as a series of longs but we need to handle
1313 * 8-byte unaligned accesses. args points to raw_data within the event
1314 * and raw_data is guaranteed to be 8-byte unaligned because it is
1315 * preceded by raw_size which is a u32. So we need to copy args to a temp
1316 * variable to read it. Most notably this avoids extended load instructions
1317 * on unaligned addresses
1318 */
1319
syscall__scnprintf_args(struct syscall * sc,char * bf,size_t size,unsigned char * args,struct trace * trace,struct thread * thread)1320 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1321 unsigned char *args, struct trace *trace,
1322 struct thread *thread)
1323 {
1324 size_t printed = 0;
1325 unsigned char *p;
1326 unsigned long val;
1327
1328 if (sc->args != NULL) {
1329 struct format_field *field;
1330 u8 bit = 1;
1331 struct syscall_arg arg = {
1332 .idx = 0,
1333 .mask = 0,
1334 .trace = trace,
1335 .thread = thread,
1336 };
1337
1338 for (field = sc->args; field;
1339 field = field->next, ++arg.idx, bit <<= 1) {
1340 if (arg.mask & bit)
1341 continue;
1342
1343 /* special care for unaligned accesses */
1344 p = args + sizeof(unsigned long) * arg.idx;
1345 memcpy(&val, p, sizeof(val));
1346
1347 /*
1348 * Suppress this argument if its value is zero and
1349 * and we don't have a string associated in an
1350 * strarray for it.
1351 */
1352 if (val == 0 &&
1353 !(sc->arg_scnprintf &&
1354 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1355 sc->arg_parm[arg.idx]))
1356 continue;
1357
1358 printed += scnprintf(bf + printed, size - printed,
1359 "%s%s: ", printed ? ", " : "", field->name);
1360 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1361 arg.val = val;
1362 if (sc->arg_parm)
1363 arg.parm = sc->arg_parm[arg.idx];
1364 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1365 size - printed, &arg);
1366 } else {
1367 printed += scnprintf(bf + printed, size - printed,
1368 "%ld", val);
1369 }
1370 }
1371 } else if (IS_ERR(sc->tp_format)) {
1372 /*
1373 * If we managed to read the tracepoint /format file, then we
1374 * may end up not having any args, like with gettid(), so only
1375 * print the raw args when we didn't manage to read it.
1376 */
1377 int i = 0;
1378
1379 while (i < 6) {
1380 /* special care for unaligned accesses */
1381 p = args + sizeof(unsigned long) * i;
1382 memcpy(&val, p, sizeof(val));
1383 printed += scnprintf(bf + printed, size - printed,
1384 "%sarg%d: %ld",
1385 printed ? ", " : "", i, val);
1386 ++i;
1387 }
1388 }
1389
1390 return printed;
1391 }
1392
1393 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1394 union perf_event *event,
1395 struct perf_sample *sample);
1396
trace__syscall_info(struct trace * trace,struct perf_evsel * evsel,int id)1397 static struct syscall *trace__syscall_info(struct trace *trace,
1398 struct perf_evsel *evsel, int id)
1399 {
1400
1401 if (id < 0) {
1402
1403 /*
1404 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1405 * before that, leaving at a higher verbosity level till that is
1406 * explained. Reproduced with plain ftrace with:
1407 *
1408 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1409 * grep "NR -1 " /t/trace_pipe
1410 *
1411 * After generating some load on the machine.
1412 */
1413 if (verbose > 1) {
1414 static u64 n;
1415 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1416 id, perf_evsel__name(evsel), ++n);
1417 }
1418 return NULL;
1419 }
1420
1421 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1422 trace__read_syscall_info(trace, id))
1423 goto out_cant_read;
1424
1425 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1426 goto out_cant_read;
1427
1428 return &trace->syscalls.table[id];
1429
1430 out_cant_read:
1431 if (verbose) {
1432 fprintf(trace->output, "Problems reading syscall %d", id);
1433 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1434 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1435 fputs(" information\n", trace->output);
1436 }
1437 return NULL;
1438 }
1439
thread__update_stats(struct thread_trace * ttrace,int id,struct perf_sample * sample)1440 static void thread__update_stats(struct thread_trace *ttrace,
1441 int id, struct perf_sample *sample)
1442 {
1443 struct int_node *inode;
1444 struct stats *stats;
1445 u64 duration = 0;
1446
1447 inode = intlist__findnew(ttrace->syscall_stats, id);
1448 if (inode == NULL)
1449 return;
1450
1451 stats = inode->priv;
1452 if (stats == NULL) {
1453 stats = malloc(sizeof(struct stats));
1454 if (stats == NULL)
1455 return;
1456 init_stats(stats);
1457 inode->priv = stats;
1458 }
1459
1460 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1461 duration = sample->time - ttrace->entry_time;
1462
1463 update_stats(stats, duration);
1464 }
1465
trace__printf_interrupted_entry(struct trace * trace,struct perf_sample * sample)1466 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1467 {
1468 struct thread_trace *ttrace;
1469 u64 duration;
1470 size_t printed;
1471
1472 if (trace->current == NULL)
1473 return 0;
1474
1475 ttrace = thread__priv(trace->current);
1476
1477 if (!ttrace->entry_pending)
1478 return 0;
1479
1480 duration = sample->time - ttrace->entry_time;
1481
1482 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1483 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1484 ttrace->entry_pending = false;
1485
1486 return printed;
1487 }
1488
trace__sys_enter(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1489 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1490 union perf_event *event __maybe_unused,
1491 struct perf_sample *sample)
1492 {
1493 char *msg;
1494 void *args;
1495 size_t printed = 0;
1496 struct thread *thread;
1497 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1498 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1499 struct thread_trace *ttrace;
1500
1501 if (sc == NULL)
1502 return -1;
1503
1504 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1505 ttrace = thread__trace(thread, trace->output);
1506 if (ttrace == NULL)
1507 goto out_put;
1508
1509 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1510
1511 if (ttrace->entry_str == NULL) {
1512 ttrace->entry_str = malloc(trace__entry_str_size);
1513 if (!ttrace->entry_str)
1514 goto out_put;
1515 }
1516
1517 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1518 trace__printf_interrupted_entry(trace, sample);
1519
1520 ttrace->entry_time = sample->time;
1521 msg = ttrace->entry_str;
1522 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1523
1524 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1525 args, trace, thread);
1526
1527 if (sc->is_exit) {
1528 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1529 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1530 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1531 }
1532 } else {
1533 ttrace->entry_pending = true;
1534 /* See trace__vfs_getname & trace__sys_exit */
1535 ttrace->filename.pending_open = false;
1536 }
1537
1538 if (trace->current != thread) {
1539 thread__put(trace->current);
1540 trace->current = thread__get(thread);
1541 }
1542 err = 0;
1543 out_put:
1544 thread__put(thread);
1545 return err;
1546 }
1547
trace__resolve_callchain(struct trace * trace,struct perf_evsel * evsel,struct perf_sample * sample,struct callchain_cursor * cursor)1548 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1549 struct perf_sample *sample,
1550 struct callchain_cursor *cursor)
1551 {
1552 struct addr_location al;
1553
1554 if (machine__resolve(trace->host, &al, sample) < 0 ||
1555 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1556 return -1;
1557
1558 return 0;
1559 }
1560
trace__fprintf_callchain(struct trace * trace,struct perf_sample * sample)1561 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1562 {
1563 /* TODO: user-configurable print_opts */
1564 const unsigned int print_opts = EVSEL__PRINT_SYM |
1565 EVSEL__PRINT_DSO |
1566 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1567
1568 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1569 }
1570
trace__sys_exit(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1571 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1572 union perf_event *event __maybe_unused,
1573 struct perf_sample *sample)
1574 {
1575 long ret;
1576 u64 duration = 0;
1577 bool duration_calculated = false;
1578 struct thread *thread;
1579 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1580 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1581 struct thread_trace *ttrace;
1582
1583 if (sc == NULL)
1584 return -1;
1585
1586 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1587 ttrace = thread__trace(thread, trace->output);
1588 if (ttrace == NULL)
1589 goto out_put;
1590
1591 if (trace->summary)
1592 thread__update_stats(ttrace, id, sample);
1593
1594 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1595
1596 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1597 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1598 ttrace->filename.pending_open = false;
1599 ++trace->stats.vfs_getname;
1600 }
1601
1602 ttrace->exit_time = sample->time;
1603
1604 if (ttrace->entry_time) {
1605 duration = sample->time - ttrace->entry_time;
1606 if (trace__filter_duration(trace, duration))
1607 goto out;
1608 duration_calculated = true;
1609 } else if (trace->duration_filter)
1610 goto out;
1611
1612 if (sample->callchain) {
1613 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1614 if (callchain_ret == 0) {
1615 if (callchain_cursor.nr < trace->min_stack)
1616 goto out;
1617 callchain_ret = 1;
1618 }
1619 }
1620
1621 if (trace->summary_only)
1622 goto out;
1623
1624 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1625
1626 if (ttrace->entry_pending) {
1627 fprintf(trace->output, "%-70s", ttrace->entry_str);
1628 } else {
1629 fprintf(trace->output, " ... [");
1630 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1631 fprintf(trace->output, "]: %s()", sc->name);
1632 }
1633
1634 if (sc->fmt == NULL) {
1635 signed_print:
1636 fprintf(trace->output, ") = %ld", ret);
1637 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1638 char bf[STRERR_BUFSIZE];
1639 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1640 *e = audit_errno_to_name(-ret);
1641
1642 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1643 } else if (ret == 0 && sc->fmt->timeout)
1644 fprintf(trace->output, ") = 0 Timeout");
1645 else if (sc->fmt->hexret)
1646 fprintf(trace->output, ") = %#lx", ret);
1647 else if (sc->fmt->errpid) {
1648 struct thread *child = machine__find_thread(trace->host, ret, ret);
1649
1650 if (child != NULL) {
1651 fprintf(trace->output, ") = %ld", ret);
1652 if (child->comm_set)
1653 fprintf(trace->output, " (%s)", thread__comm_str(child));
1654 thread__put(child);
1655 }
1656 } else
1657 goto signed_print;
1658
1659 fputc('\n', trace->output);
1660
1661 if (callchain_ret > 0)
1662 trace__fprintf_callchain(trace, sample);
1663 else if (callchain_ret < 0)
1664 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1665 out:
1666 ttrace->entry_pending = false;
1667 err = 0;
1668 out_put:
1669 thread__put(thread);
1670 return err;
1671 }
1672
trace__vfs_getname(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1673 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1674 union perf_event *event __maybe_unused,
1675 struct perf_sample *sample)
1676 {
1677 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1678 struct thread_trace *ttrace;
1679 size_t filename_len, entry_str_len, to_move;
1680 ssize_t remaining_space;
1681 char *pos;
1682 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1683
1684 if (!thread)
1685 goto out;
1686
1687 ttrace = thread__priv(thread);
1688 if (!ttrace)
1689 goto out;
1690
1691 filename_len = strlen(filename);
1692
1693 if (ttrace->filename.namelen < filename_len) {
1694 char *f = realloc(ttrace->filename.name, filename_len + 1);
1695
1696 if (f == NULL)
1697 goto out;
1698
1699 ttrace->filename.namelen = filename_len;
1700 ttrace->filename.name = f;
1701 }
1702
1703 strcpy(ttrace->filename.name, filename);
1704 ttrace->filename.pending_open = true;
1705
1706 if (!ttrace->filename.ptr)
1707 goto out;
1708
1709 entry_str_len = strlen(ttrace->entry_str);
1710 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1711 if (remaining_space <= 0)
1712 goto out;
1713
1714 if (filename_len > (size_t)remaining_space) {
1715 filename += filename_len - remaining_space;
1716 filename_len = remaining_space;
1717 }
1718
1719 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1720 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1721 memmove(pos + filename_len, pos, to_move);
1722 memcpy(pos, filename, filename_len);
1723
1724 ttrace->filename.ptr = 0;
1725 ttrace->filename.entry_str_pos = 0;
1726 out:
1727 return 0;
1728 }
1729
trace__sched_stat_runtime(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1730 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1731 union perf_event *event __maybe_unused,
1732 struct perf_sample *sample)
1733 {
1734 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1735 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1736 struct thread *thread = machine__findnew_thread(trace->host,
1737 sample->pid,
1738 sample->tid);
1739 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1740
1741 if (ttrace == NULL)
1742 goto out_dump;
1743
1744 ttrace->runtime_ms += runtime_ms;
1745 trace->runtime_ms += runtime_ms;
1746 thread__put(thread);
1747 return 0;
1748
1749 out_dump:
1750 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1751 evsel->name,
1752 perf_evsel__strval(evsel, sample, "comm"),
1753 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1754 runtime,
1755 perf_evsel__intval(evsel, sample, "vruntime"));
1756 thread__put(thread);
1757 return 0;
1758 }
1759
bpf_output__printer(enum binary_printer_ops op,unsigned int val,void * extra)1760 static void bpf_output__printer(enum binary_printer_ops op,
1761 unsigned int val, void *extra)
1762 {
1763 FILE *output = extra;
1764 unsigned char ch = (unsigned char)val;
1765
1766 switch (op) {
1767 case BINARY_PRINT_CHAR_DATA:
1768 fprintf(output, "%c", isprint(ch) ? ch : '.');
1769 break;
1770 case BINARY_PRINT_DATA_BEGIN:
1771 case BINARY_PRINT_LINE_BEGIN:
1772 case BINARY_PRINT_ADDR:
1773 case BINARY_PRINT_NUM_DATA:
1774 case BINARY_PRINT_NUM_PAD:
1775 case BINARY_PRINT_SEP:
1776 case BINARY_PRINT_CHAR_PAD:
1777 case BINARY_PRINT_LINE_END:
1778 case BINARY_PRINT_DATA_END:
1779 default:
1780 break;
1781 }
1782 }
1783
bpf_output__fprintf(struct trace * trace,struct perf_sample * sample)1784 static void bpf_output__fprintf(struct trace *trace,
1785 struct perf_sample *sample)
1786 {
1787 print_binary(sample->raw_data, sample->raw_size, 8,
1788 bpf_output__printer, trace->output);
1789 }
1790
trace__event_handler(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1791 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1792 union perf_event *event __maybe_unused,
1793 struct perf_sample *sample)
1794 {
1795 int callchain_ret = 0;
1796
1797 if (sample->callchain) {
1798 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1799 if (callchain_ret == 0) {
1800 if (callchain_cursor.nr < trace->min_stack)
1801 goto out;
1802 callchain_ret = 1;
1803 }
1804 }
1805
1806 trace__printf_interrupted_entry(trace, sample);
1807 trace__fprintf_tstamp(trace, sample->time, trace->output);
1808
1809 if (trace->trace_syscalls)
1810 fprintf(trace->output, "( ): ");
1811
1812 fprintf(trace->output, "%s:", evsel->name);
1813
1814 if (perf_evsel__is_bpf_output(evsel)) {
1815 bpf_output__fprintf(trace, sample);
1816 } else if (evsel->tp_format) {
1817 event_format__fprintf(evsel->tp_format, sample->cpu,
1818 sample->raw_data, sample->raw_size,
1819 trace->output);
1820 }
1821
1822 fprintf(trace->output, ")\n");
1823
1824 if (callchain_ret > 0)
1825 trace__fprintf_callchain(trace, sample);
1826 else if (callchain_ret < 0)
1827 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1828 out:
1829 return 0;
1830 }
1831
print_location(FILE * f,struct perf_sample * sample,struct addr_location * al,bool print_dso,bool print_sym)1832 static void print_location(FILE *f, struct perf_sample *sample,
1833 struct addr_location *al,
1834 bool print_dso, bool print_sym)
1835 {
1836
1837 if ((verbose || print_dso) && al->map)
1838 fprintf(f, "%s@", al->map->dso->long_name);
1839
1840 if ((verbose || print_sym) && al->sym)
1841 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1842 al->addr - al->sym->start);
1843 else if (al->map)
1844 fprintf(f, "0x%" PRIx64, al->addr);
1845 else
1846 fprintf(f, "0x%" PRIx64, sample->addr);
1847 }
1848
trace__pgfault(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1849 static int trace__pgfault(struct trace *trace,
1850 struct perf_evsel *evsel,
1851 union perf_event *event __maybe_unused,
1852 struct perf_sample *sample)
1853 {
1854 struct thread *thread;
1855 struct addr_location al;
1856 char map_type = 'd';
1857 struct thread_trace *ttrace;
1858 int err = -1;
1859 int callchain_ret = 0;
1860
1861 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1862
1863 if (sample->callchain) {
1864 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1865 if (callchain_ret == 0) {
1866 if (callchain_cursor.nr < trace->min_stack)
1867 goto out_put;
1868 callchain_ret = 1;
1869 }
1870 }
1871
1872 ttrace = thread__trace(thread, trace->output);
1873 if (ttrace == NULL)
1874 goto out_put;
1875
1876 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1877 ttrace->pfmaj++;
1878 else
1879 ttrace->pfmin++;
1880
1881 if (trace->summary_only)
1882 goto out;
1883
1884 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1885 sample->ip, &al);
1886
1887 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1888
1889 fprintf(trace->output, "%sfault [",
1890 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1891 "maj" : "min");
1892
1893 print_location(trace->output, sample, &al, false, true);
1894
1895 fprintf(trace->output, "] => ");
1896
1897 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1898 sample->addr, &al);
1899
1900 if (!al.map) {
1901 thread__find_addr_location(thread, sample->cpumode,
1902 MAP__FUNCTION, sample->addr, &al);
1903
1904 if (al.map)
1905 map_type = 'x';
1906 else
1907 map_type = '?';
1908 }
1909
1910 print_location(trace->output, sample, &al, true, false);
1911
1912 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1913
1914 if (callchain_ret > 0)
1915 trace__fprintf_callchain(trace, sample);
1916 else if (callchain_ret < 0)
1917 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1918 out:
1919 err = 0;
1920 out_put:
1921 thread__put(thread);
1922 return err;
1923 }
1924
skip_sample(struct trace * trace,struct perf_sample * sample)1925 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1926 {
1927 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1928 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1929 return false;
1930
1931 if (trace->pid_list || trace->tid_list)
1932 return true;
1933
1934 return false;
1935 }
1936
trace__set_base_time(struct trace * trace,struct perf_evsel * evsel,struct perf_sample * sample)1937 static void trace__set_base_time(struct trace *trace,
1938 struct perf_evsel *evsel,
1939 struct perf_sample *sample)
1940 {
1941 /*
1942 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1943 * and don't use sample->time unconditionally, we may end up having
1944 * some other event in the future without PERF_SAMPLE_TIME for good
1945 * reason, i.e. we may not be interested in its timestamps, just in
1946 * it taking place, picking some piece of information when it
1947 * appears in our event stream (vfs_getname comes to mind).
1948 */
1949 if (trace->base_time == 0 && !trace->full_time &&
1950 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1951 trace->base_time = sample->time;
1952 }
1953
trace__process_sample(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct perf_evsel * evsel,struct machine * machine __maybe_unused)1954 static int trace__process_sample(struct perf_tool *tool,
1955 union perf_event *event,
1956 struct perf_sample *sample,
1957 struct perf_evsel *evsel,
1958 struct machine *machine __maybe_unused)
1959 {
1960 struct trace *trace = container_of(tool, struct trace, tool);
1961 int err = 0;
1962
1963 tracepoint_handler handler = evsel->handler;
1964
1965 if (skip_sample(trace, sample))
1966 return 0;
1967
1968 trace__set_base_time(trace, evsel, sample);
1969
1970 if (handler) {
1971 ++trace->nr_events;
1972 handler(trace, evsel, event, sample);
1973 }
1974
1975 return err;
1976 }
1977
parse_target_str(struct trace * trace)1978 static int parse_target_str(struct trace *trace)
1979 {
1980 if (trace->opts.target.pid) {
1981 trace->pid_list = intlist__new(trace->opts.target.pid);
1982 if (trace->pid_list == NULL) {
1983 pr_err("Error parsing process id string\n");
1984 return -EINVAL;
1985 }
1986 }
1987
1988 if (trace->opts.target.tid) {
1989 trace->tid_list = intlist__new(trace->opts.target.tid);
1990 if (trace->tid_list == NULL) {
1991 pr_err("Error parsing thread id string\n");
1992 return -EINVAL;
1993 }
1994 }
1995
1996 return 0;
1997 }
1998
trace__record(struct trace * trace,int argc,const char ** argv)1999 static int trace__record(struct trace *trace, int argc, const char **argv)
2000 {
2001 unsigned int rec_argc, i, j;
2002 const char **rec_argv;
2003 const char * const record_args[] = {
2004 "record",
2005 "-R",
2006 "-m", "1024",
2007 "-c", "1",
2008 };
2009
2010 const char * const sc_args[] = { "-e", };
2011 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2012 const char * const majpf_args[] = { "-e", "major-faults" };
2013 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2014 const char * const minpf_args[] = { "-e", "minor-faults" };
2015 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2016
2017 /* +1 is for the event string below */
2018 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2019 majpf_args_nr + minpf_args_nr + argc;
2020 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2021
2022 if (rec_argv == NULL)
2023 return -ENOMEM;
2024
2025 j = 0;
2026 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2027 rec_argv[j++] = record_args[i];
2028
2029 if (trace->trace_syscalls) {
2030 for (i = 0; i < sc_args_nr; i++)
2031 rec_argv[j++] = sc_args[i];
2032
2033 /* event string may be different for older kernels - e.g., RHEL6 */
2034 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2035 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2036 else if (is_valid_tracepoint("syscalls:sys_enter"))
2037 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2038 else {
2039 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2040 return -1;
2041 }
2042 }
2043
2044 if (trace->trace_pgfaults & TRACE_PFMAJ)
2045 for (i = 0; i < majpf_args_nr; i++)
2046 rec_argv[j++] = majpf_args[i];
2047
2048 if (trace->trace_pgfaults & TRACE_PFMIN)
2049 for (i = 0; i < minpf_args_nr; i++)
2050 rec_argv[j++] = minpf_args[i];
2051
2052 for (i = 0; i < (unsigned int)argc; i++)
2053 rec_argv[j++] = argv[i];
2054
2055 return cmd_record(j, rec_argv, NULL);
2056 }
2057
2058 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2059
perf_evlist__add_vfs_getname(struct perf_evlist * evlist)2060 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2061 {
2062 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2063
2064 if (IS_ERR(evsel))
2065 return false;
2066
2067 if (perf_evsel__field(evsel, "pathname") == NULL) {
2068 perf_evsel__delete(evsel);
2069 return false;
2070 }
2071
2072 evsel->handler = trace__vfs_getname;
2073 perf_evlist__add(evlist, evsel);
2074 return true;
2075 }
2076
perf_evsel__new_pgfault(u64 config)2077 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2078 {
2079 struct perf_evsel *evsel;
2080 struct perf_event_attr attr = {
2081 .type = PERF_TYPE_SOFTWARE,
2082 .mmap_data = 1,
2083 };
2084
2085 attr.config = config;
2086 attr.sample_period = 1;
2087
2088 event_attr_init(&attr);
2089
2090 evsel = perf_evsel__new(&attr);
2091 if (evsel)
2092 evsel->handler = trace__pgfault;
2093
2094 return evsel;
2095 }
2096
trace__handle_event(struct trace * trace,union perf_event * event,struct perf_sample * sample)2097 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2098 {
2099 const u32 type = event->header.type;
2100 struct perf_evsel *evsel;
2101
2102 if (type != PERF_RECORD_SAMPLE) {
2103 trace__process_event(trace, trace->host, event, sample);
2104 return;
2105 }
2106
2107 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2108 if (evsel == NULL) {
2109 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2110 return;
2111 }
2112
2113 trace__set_base_time(trace, evsel, sample);
2114
2115 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2116 sample->raw_data == NULL) {
2117 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2118 perf_evsel__name(evsel), sample->tid,
2119 sample->cpu, sample->raw_size);
2120 } else {
2121 tracepoint_handler handler = evsel->handler;
2122 handler(trace, evsel, event, sample);
2123 }
2124 }
2125
trace__add_syscall_newtp(struct trace * trace)2126 static int trace__add_syscall_newtp(struct trace *trace)
2127 {
2128 int ret = -1;
2129 struct perf_evlist *evlist = trace->evlist;
2130 struct perf_evsel *sys_enter, *sys_exit;
2131
2132 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2133 if (sys_enter == NULL)
2134 goto out;
2135
2136 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2137 goto out_delete_sys_enter;
2138
2139 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2140 if (sys_exit == NULL)
2141 goto out_delete_sys_enter;
2142
2143 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2144 goto out_delete_sys_exit;
2145
2146 perf_evlist__add(evlist, sys_enter);
2147 perf_evlist__add(evlist, sys_exit);
2148
2149 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2150 /*
2151 * We're interested only in the user space callchain
2152 * leading to the syscall, allow overriding that for
2153 * debugging reasons using --kernel_syscall_callchains
2154 */
2155 sys_exit->attr.exclude_callchain_kernel = 1;
2156 }
2157
2158 trace->syscalls.events.sys_enter = sys_enter;
2159 trace->syscalls.events.sys_exit = sys_exit;
2160
2161 ret = 0;
2162 out:
2163 return ret;
2164
2165 out_delete_sys_exit:
2166 perf_evsel__delete_priv(sys_exit);
2167 out_delete_sys_enter:
2168 perf_evsel__delete_priv(sys_enter);
2169 goto out;
2170 }
2171
trace__set_ev_qualifier_filter(struct trace * trace)2172 static int trace__set_ev_qualifier_filter(struct trace *trace)
2173 {
2174 int err = -1;
2175 struct perf_evsel *sys_exit;
2176 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2177 trace->ev_qualifier_ids.nr,
2178 trace->ev_qualifier_ids.entries);
2179
2180 if (filter == NULL)
2181 goto out_enomem;
2182
2183 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2184 filter)) {
2185 sys_exit = trace->syscalls.events.sys_exit;
2186 err = perf_evsel__append_tp_filter(sys_exit, filter);
2187 }
2188
2189 free(filter);
2190 out:
2191 return err;
2192 out_enomem:
2193 errno = ENOMEM;
2194 goto out;
2195 }
2196
trace__run(struct trace * trace,int argc,const char ** argv)2197 static int trace__run(struct trace *trace, int argc, const char **argv)
2198 {
2199 struct perf_evlist *evlist = trace->evlist;
2200 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2201 int err = -1, i;
2202 unsigned long before;
2203 const bool forks = argc > 0;
2204 bool draining = false;
2205
2206 trace->live = true;
2207
2208 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2209 goto out_error_raw_syscalls;
2210
2211 if (trace->trace_syscalls)
2212 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2213
2214 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2215 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2216 if (pgfault_maj == NULL)
2217 goto out_error_mem;
2218 perf_evlist__add(evlist, pgfault_maj);
2219 }
2220
2221 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2222 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2223 if (pgfault_min == NULL)
2224 goto out_error_mem;
2225 perf_evlist__add(evlist, pgfault_min);
2226 }
2227
2228 if (trace->sched &&
2229 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2230 trace__sched_stat_runtime))
2231 goto out_error_sched_stat_runtime;
2232
2233 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2234 if (err < 0) {
2235 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2236 goto out_delete_evlist;
2237 }
2238
2239 err = trace__symbols_init(trace, evlist);
2240 if (err < 0) {
2241 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2242 goto out_delete_evlist;
2243 }
2244
2245 perf_evlist__config(evlist, &trace->opts, NULL);
2246
2247 if (callchain_param.enabled) {
2248 bool use_identifier = false;
2249
2250 if (trace->syscalls.events.sys_exit) {
2251 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2252 &trace->opts, &callchain_param);
2253 use_identifier = true;
2254 }
2255
2256 if (pgfault_maj) {
2257 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2258 use_identifier = true;
2259 }
2260
2261 if (pgfault_min) {
2262 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2263 use_identifier = true;
2264 }
2265
2266 if (use_identifier) {
2267 /*
2268 * Now we have evsels with different sample_ids, use
2269 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2270 * from a fixed position in each ring buffer record.
2271 *
2272 * As of this the changeset introducing this comment, this
2273 * isn't strictly needed, as the fields that can come before
2274 * PERF_SAMPLE_ID are all used, but we'll probably disable
2275 * some of those for things like copying the payload of
2276 * pointer syscall arguments, and for vfs_getname we don't
2277 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2278 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2279 */
2280 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2281 perf_evlist__reset_sample_bit(evlist, ID);
2282 }
2283 }
2284
2285 signal(SIGCHLD, sig_handler);
2286 signal(SIGINT, sig_handler);
2287
2288 if (forks) {
2289 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2290 argv, false, NULL);
2291 if (err < 0) {
2292 fprintf(trace->output, "Couldn't run the workload!\n");
2293 goto out_delete_evlist;
2294 }
2295 }
2296
2297 err = perf_evlist__open(evlist);
2298 if (err < 0)
2299 goto out_error_open;
2300
2301 err = bpf__apply_obj_config();
2302 if (err) {
2303 char errbuf[BUFSIZ];
2304
2305 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2306 pr_err("ERROR: Apply config to BPF failed: %s\n",
2307 errbuf);
2308 goto out_error_open;
2309 }
2310
2311 /*
2312 * Better not use !target__has_task() here because we need to cover the
2313 * case where no threads were specified in the command line, but a
2314 * workload was, and in that case we will fill in the thread_map when
2315 * we fork the workload in perf_evlist__prepare_workload.
2316 */
2317 if (trace->filter_pids.nr > 0)
2318 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2319 else if (thread_map__pid(evlist->threads, 0) == -1)
2320 err = perf_evlist__set_filter_pid(evlist, getpid());
2321
2322 if (err < 0)
2323 goto out_error_mem;
2324
2325 if (trace->ev_qualifier_ids.nr > 0) {
2326 err = trace__set_ev_qualifier_filter(trace);
2327 if (err < 0)
2328 goto out_errno;
2329
2330 pr_debug("event qualifier tracepoint filter: %s\n",
2331 trace->syscalls.events.sys_exit->filter);
2332 }
2333
2334 err = perf_evlist__apply_filters(evlist, &evsel);
2335 if (err < 0)
2336 goto out_error_apply_filters;
2337
2338 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2339 if (err < 0)
2340 goto out_error_mmap;
2341
2342 if (!target__none(&trace->opts.target))
2343 perf_evlist__enable(evlist);
2344
2345 if (forks)
2346 perf_evlist__start_workload(evlist);
2347
2348 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2349 evlist->threads->nr > 1 ||
2350 perf_evlist__first(evlist)->attr.inherit;
2351 again:
2352 before = trace->nr_events;
2353
2354 for (i = 0; i < evlist->nr_mmaps; i++) {
2355 union perf_event *event;
2356
2357 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2358 struct perf_sample sample;
2359
2360 ++trace->nr_events;
2361
2362 err = perf_evlist__parse_sample(evlist, event, &sample);
2363 if (err) {
2364 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2365 goto next_event;
2366 }
2367
2368 trace__handle_event(trace, event, &sample);
2369 next_event:
2370 perf_evlist__mmap_consume(evlist, i);
2371
2372 if (interrupted)
2373 goto out_disable;
2374
2375 if (done && !draining) {
2376 perf_evlist__disable(evlist);
2377 draining = true;
2378 }
2379 }
2380 }
2381
2382 if (trace->nr_events == before) {
2383 int timeout = done ? 100 : -1;
2384
2385 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2386 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2387 draining = true;
2388
2389 goto again;
2390 }
2391 } else {
2392 goto again;
2393 }
2394
2395 out_disable:
2396 thread__zput(trace->current);
2397
2398 perf_evlist__disable(evlist);
2399
2400 if (!err) {
2401 if (trace->summary)
2402 trace__fprintf_thread_summary(trace, trace->output);
2403
2404 if (trace->show_tool_stats) {
2405 fprintf(trace->output, "Stats:\n "
2406 " vfs_getname : %" PRIu64 "\n"
2407 " proc_getname: %" PRIu64 "\n",
2408 trace->stats.vfs_getname,
2409 trace->stats.proc_getname);
2410 }
2411 }
2412
2413 out_delete_evlist:
2414 perf_evlist__delete(evlist);
2415 trace->evlist = NULL;
2416 trace->live = false;
2417 return err;
2418 {
2419 char errbuf[BUFSIZ];
2420
2421 out_error_sched_stat_runtime:
2422 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2423 goto out_error;
2424
2425 out_error_raw_syscalls:
2426 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2427 goto out_error;
2428
2429 out_error_mmap:
2430 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2431 goto out_error;
2432
2433 out_error_open:
2434 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2435
2436 out_error:
2437 fprintf(trace->output, "%s\n", errbuf);
2438 goto out_delete_evlist;
2439
2440 out_error_apply_filters:
2441 fprintf(trace->output,
2442 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2443 evsel->filter, perf_evsel__name(evsel), errno,
2444 str_error_r(errno, errbuf, sizeof(errbuf)));
2445 goto out_delete_evlist;
2446 }
2447 out_error_mem:
2448 fprintf(trace->output, "Not enough memory to run!\n");
2449 goto out_delete_evlist;
2450
2451 out_errno:
2452 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2453 goto out_delete_evlist;
2454 }
2455
trace__replay(struct trace * trace)2456 static int trace__replay(struct trace *trace)
2457 {
2458 const struct perf_evsel_str_handler handlers[] = {
2459 { "probe:vfs_getname", trace__vfs_getname, },
2460 };
2461 struct perf_data_file file = {
2462 .path = input_name,
2463 .mode = PERF_DATA_MODE_READ,
2464 .force = trace->force,
2465 };
2466 struct perf_session *session;
2467 struct perf_evsel *evsel;
2468 int err = -1;
2469
2470 trace->tool.sample = trace__process_sample;
2471 trace->tool.mmap = perf_event__process_mmap;
2472 trace->tool.mmap2 = perf_event__process_mmap2;
2473 trace->tool.comm = perf_event__process_comm;
2474 trace->tool.exit = perf_event__process_exit;
2475 trace->tool.fork = perf_event__process_fork;
2476 trace->tool.attr = perf_event__process_attr;
2477 trace->tool.tracing_data = perf_event__process_tracing_data;
2478 trace->tool.build_id = perf_event__process_build_id;
2479
2480 trace->tool.ordered_events = true;
2481 trace->tool.ordering_requires_timestamps = true;
2482
2483 /* add tid to output */
2484 trace->multiple_threads = true;
2485
2486 session = perf_session__new(&file, false, &trace->tool);
2487 if (session == NULL)
2488 return -1;
2489
2490 if (symbol__init(&session->header.env) < 0)
2491 goto out;
2492
2493 trace->host = &session->machines.host;
2494
2495 err = perf_session__set_tracepoints_handlers(session, handlers);
2496 if (err)
2497 goto out;
2498
2499 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2500 "raw_syscalls:sys_enter");
2501 /* older kernels have syscalls tp versus raw_syscalls */
2502 if (evsel == NULL)
2503 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2504 "syscalls:sys_enter");
2505
2506 if (evsel &&
2507 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2508 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2509 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2510 goto out;
2511 }
2512
2513 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2514 "raw_syscalls:sys_exit");
2515 if (evsel == NULL)
2516 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2517 "syscalls:sys_exit");
2518 if (evsel &&
2519 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2520 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2521 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2522 goto out;
2523 }
2524
2525 evlist__for_each_entry(session->evlist, evsel) {
2526 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2527 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2528 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2529 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2530 evsel->handler = trace__pgfault;
2531 }
2532
2533 err = parse_target_str(trace);
2534 if (err != 0)
2535 goto out;
2536
2537 setup_pager();
2538
2539 err = perf_session__process_events(session);
2540 if (err)
2541 pr_err("Failed to process events, error %d", err);
2542
2543 else if (trace->summary)
2544 trace__fprintf_thread_summary(trace, trace->output);
2545
2546 out:
2547 perf_session__delete(session);
2548
2549 return err;
2550 }
2551
trace__fprintf_threads_header(FILE * fp)2552 static size_t trace__fprintf_threads_header(FILE *fp)
2553 {
2554 size_t printed;
2555
2556 printed = fprintf(fp, "\n Summary of events:\n\n");
2557
2558 return printed;
2559 }
2560
2561 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2562 struct stats *stats;
2563 double msecs;
2564 int syscall;
2565 )
2566 {
2567 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2568 struct stats *stats = source->priv;
2569
2570 entry->syscall = source->i;
2571 entry->stats = stats;
2572 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2573 }
2574
thread__dump_stats(struct thread_trace * ttrace,struct trace * trace,FILE * fp)2575 static size_t thread__dump_stats(struct thread_trace *ttrace,
2576 struct trace *trace, FILE *fp)
2577 {
2578 size_t printed = 0;
2579 struct syscall *sc;
2580 struct rb_node *nd;
2581 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2582
2583 if (syscall_stats == NULL)
2584 return 0;
2585
2586 printed += fprintf(fp, "\n");
2587
2588 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2589 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2590 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2591
2592 resort_rb__for_each_entry(nd, syscall_stats) {
2593 struct stats *stats = syscall_stats_entry->stats;
2594 if (stats) {
2595 double min = (double)(stats->min) / NSEC_PER_MSEC;
2596 double max = (double)(stats->max) / NSEC_PER_MSEC;
2597 double avg = avg_stats(stats);
2598 double pct;
2599 u64 n = (u64) stats->n;
2600
2601 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2602 avg /= NSEC_PER_MSEC;
2603
2604 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2605 printed += fprintf(fp, " %-15s", sc->name);
2606 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2607 n, syscall_stats_entry->msecs, min, avg);
2608 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2609 }
2610 }
2611
2612 resort_rb__delete(syscall_stats);
2613 printed += fprintf(fp, "\n\n");
2614
2615 return printed;
2616 }
2617
trace__fprintf_thread(FILE * fp,struct thread * thread,struct trace * trace)2618 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2619 {
2620 size_t printed = 0;
2621 struct thread_trace *ttrace = thread__priv(thread);
2622 double ratio;
2623
2624 if (ttrace == NULL)
2625 return 0;
2626
2627 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2628
2629 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2630 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2631 printed += fprintf(fp, "%.1f%%", ratio);
2632 if (ttrace->pfmaj)
2633 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2634 if (ttrace->pfmin)
2635 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2636 if (trace->sched)
2637 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2638 else if (fputc('\n', fp) != EOF)
2639 ++printed;
2640
2641 printed += thread__dump_stats(ttrace, trace, fp);
2642
2643 return printed;
2644 }
2645
thread__nr_events(struct thread_trace * ttrace)2646 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2647 {
2648 return ttrace ? ttrace->nr_events : 0;
2649 }
2650
2651 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2652 struct thread *thread;
2653 )
2654 {
2655 entry->thread = rb_entry(nd, struct thread, rb_node);
2656 }
2657
trace__fprintf_thread_summary(struct trace * trace,FILE * fp)2658 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2659 {
2660 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2661 size_t printed = trace__fprintf_threads_header(fp);
2662 struct rb_node *nd;
2663
2664 if (threads == NULL) {
2665 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2666 return 0;
2667 }
2668
2669 resort_rb__for_each_entry(nd, threads)
2670 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2671
2672 resort_rb__delete(threads);
2673
2674 return printed;
2675 }
2676
trace__set_duration(const struct option * opt,const char * str,int unset __maybe_unused)2677 static int trace__set_duration(const struct option *opt, const char *str,
2678 int unset __maybe_unused)
2679 {
2680 struct trace *trace = opt->value;
2681
2682 trace->duration_filter = atof(str);
2683 return 0;
2684 }
2685
trace__set_filter_pids(const struct option * opt,const char * str,int unset __maybe_unused)2686 static int trace__set_filter_pids(const struct option *opt, const char *str,
2687 int unset __maybe_unused)
2688 {
2689 int ret = -1;
2690 size_t i;
2691 struct trace *trace = opt->value;
2692 /*
2693 * FIXME: introduce a intarray class, plain parse csv and create a
2694 * { int nr, int entries[] } struct...
2695 */
2696 struct intlist *list = intlist__new(str);
2697
2698 if (list == NULL)
2699 return -1;
2700
2701 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2702 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2703
2704 if (trace->filter_pids.entries == NULL)
2705 goto out;
2706
2707 trace->filter_pids.entries[0] = getpid();
2708
2709 for (i = 1; i < trace->filter_pids.nr; ++i)
2710 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2711
2712 intlist__delete(list);
2713 ret = 0;
2714 out:
2715 return ret;
2716 }
2717
trace__open_output(struct trace * trace,const char * filename)2718 static int trace__open_output(struct trace *trace, const char *filename)
2719 {
2720 struct stat st;
2721
2722 if (!stat(filename, &st) && st.st_size) {
2723 char oldname[PATH_MAX];
2724
2725 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2726 unlink(oldname);
2727 rename(filename, oldname);
2728 }
2729
2730 trace->output = fopen(filename, "w");
2731
2732 return trace->output == NULL ? -errno : 0;
2733 }
2734
parse_pagefaults(const struct option * opt,const char * str,int unset __maybe_unused)2735 static int parse_pagefaults(const struct option *opt, const char *str,
2736 int unset __maybe_unused)
2737 {
2738 int *trace_pgfaults = opt->value;
2739
2740 if (strcmp(str, "all") == 0)
2741 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2742 else if (strcmp(str, "maj") == 0)
2743 *trace_pgfaults |= TRACE_PFMAJ;
2744 else if (strcmp(str, "min") == 0)
2745 *trace_pgfaults |= TRACE_PFMIN;
2746 else
2747 return -1;
2748
2749 return 0;
2750 }
2751
evlist__set_evsel_handler(struct perf_evlist * evlist,void * handler)2752 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2753 {
2754 struct perf_evsel *evsel;
2755
2756 evlist__for_each_entry(evlist, evsel)
2757 evsel->handler = handler;
2758 }
2759
cmd_trace(int argc,const char ** argv,const char * prefix __maybe_unused)2760 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2761 {
2762 const char *trace_usage[] = {
2763 "perf trace [<options>] [<command>]",
2764 "perf trace [<options>] -- <command> [<options>]",
2765 "perf trace record [<options>] [<command>]",
2766 "perf trace record [<options>] -- <command> [<options>]",
2767 NULL
2768 };
2769 struct trace trace = {
2770 .syscalls = {
2771 . max = -1,
2772 },
2773 .opts = {
2774 .target = {
2775 .uid = UINT_MAX,
2776 .uses_mmap = true,
2777 },
2778 .user_freq = UINT_MAX,
2779 .user_interval = ULLONG_MAX,
2780 .no_buffering = true,
2781 .mmap_pages = UINT_MAX,
2782 .proc_map_timeout = 500,
2783 },
2784 .output = stderr,
2785 .show_comm = true,
2786 .trace_syscalls = true,
2787 .kernel_syscallchains = false,
2788 .max_stack = UINT_MAX,
2789 };
2790 const char *output_name = NULL;
2791 const char *ev_qualifier_str = NULL;
2792 const struct option trace_options[] = {
2793 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2794 "event selector. use 'perf list' to list available events",
2795 parse_events_option),
2796 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2797 "show the thread COMM next to its id"),
2798 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2799 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2800 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2801 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2802 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2803 "trace events on existing process id"),
2804 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2805 "trace events on existing thread id"),
2806 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2807 "pids to filter (by the kernel)", trace__set_filter_pids),
2808 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2809 "system-wide collection from all CPUs"),
2810 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2811 "list of cpus to monitor"),
2812 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2813 "child tasks do not inherit counters"),
2814 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2815 "number of mmap data pages",
2816 perf_evlist__parse_mmap_pages),
2817 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2818 "user to profile"),
2819 OPT_CALLBACK(0, "duration", &trace, "float",
2820 "show only events with duration > N.M ms",
2821 trace__set_duration),
2822 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2823 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2824 OPT_BOOLEAN('T', "time", &trace.full_time,
2825 "Show full timestamp, not time relative to first start"),
2826 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2827 "Show only syscall summary with statistics"),
2828 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2829 "Show all syscalls and summary with statistics"),
2830 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2831 "Trace pagefaults", parse_pagefaults, "maj"),
2832 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2833 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2834 OPT_CALLBACK(0, "call-graph", &trace.opts,
2835 "record_mode[,record_size]", record_callchain_help,
2836 &record_parse_callchain_opt),
2837 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2838 "Show the kernel callchains on the syscall exit path"),
2839 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2840 "Set the minimum stack depth when parsing the callchain, "
2841 "anything below the specified depth will be ignored."),
2842 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2843 "Set the maximum stack depth when parsing the callchain, "
2844 "anything beyond the specified depth will be ignored. "
2845 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2846 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2847 "per thread proc mmap processing timeout in ms"),
2848 OPT_END()
2849 };
2850 bool __maybe_unused max_stack_user_set = true;
2851 bool mmap_pages_user_set = true;
2852 const char * const trace_subcommands[] = { "record", NULL };
2853 int err;
2854 char bf[BUFSIZ];
2855
2856 signal(SIGSEGV, sighandler_dump_stack);
2857 signal(SIGFPE, sighandler_dump_stack);
2858
2859 trace.evlist = perf_evlist__new();
2860 trace.sctbl = syscalltbl__new();
2861
2862 if (trace.evlist == NULL || trace.sctbl == NULL) {
2863 pr_err("Not enough memory to run!\n");
2864 err = -ENOMEM;
2865 goto out;
2866 }
2867
2868 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2869 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2870
2871 err = bpf__setup_stdout(trace.evlist);
2872 if (err) {
2873 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2874 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2875 goto out;
2876 }
2877
2878 err = -1;
2879
2880 if (trace.trace_pgfaults) {
2881 trace.opts.sample_address = true;
2882 trace.opts.sample_time = true;
2883 }
2884
2885 if (trace.opts.mmap_pages == UINT_MAX)
2886 mmap_pages_user_set = false;
2887
2888 if (trace.max_stack == UINT_MAX) {
2889 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2890 max_stack_user_set = false;
2891 }
2892
2893 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2894 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2895 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2896 #endif
2897
2898 if (callchain_param.enabled) {
2899 if (!mmap_pages_user_set && geteuid() == 0)
2900 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2901
2902 symbol_conf.use_callchain = true;
2903 }
2904
2905 if (trace.evlist->nr_entries > 0)
2906 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2907
2908 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2909 return trace__record(&trace, argc-1, &argv[1]);
2910
2911 /* summary_only implies summary option, but don't overwrite summary if set */
2912 if (trace.summary_only)
2913 trace.summary = trace.summary_only;
2914
2915 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2916 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2917 pr_err("Please specify something to trace.\n");
2918 return -1;
2919 }
2920
2921 if (!trace.trace_syscalls && ev_qualifier_str) {
2922 pr_err("The -e option can't be used with --no-syscalls.\n");
2923 goto out;
2924 }
2925
2926 if (output_name != NULL) {
2927 err = trace__open_output(&trace, output_name);
2928 if (err < 0) {
2929 perror("failed to create output file");
2930 goto out;
2931 }
2932 }
2933
2934 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2935
2936 if (ev_qualifier_str != NULL) {
2937 const char *s = ev_qualifier_str;
2938 struct strlist_config slist_config = {
2939 .dirname = system_path(STRACE_GROUPS_DIR),
2940 };
2941
2942 trace.not_ev_qualifier = *s == '!';
2943 if (trace.not_ev_qualifier)
2944 ++s;
2945 trace.ev_qualifier = strlist__new(s, &slist_config);
2946 if (trace.ev_qualifier == NULL) {
2947 fputs("Not enough memory to parse event qualifier",
2948 trace.output);
2949 err = -ENOMEM;
2950 goto out_close;
2951 }
2952
2953 err = trace__validate_ev_qualifier(&trace);
2954 if (err)
2955 goto out_close;
2956 }
2957
2958 err = target__validate(&trace.opts.target);
2959 if (err) {
2960 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2961 fprintf(trace.output, "%s", bf);
2962 goto out_close;
2963 }
2964
2965 err = target__parse_uid(&trace.opts.target);
2966 if (err) {
2967 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2968 fprintf(trace.output, "%s", bf);
2969 goto out_close;
2970 }
2971
2972 if (!argc && target__none(&trace.opts.target))
2973 trace.opts.target.system_wide = true;
2974
2975 if (input_name)
2976 err = trace__replay(&trace);
2977 else
2978 err = trace__run(&trace, argc, argv);
2979
2980 out_close:
2981 if (output_name != NULL)
2982 fclose(trace.output);
2983 out:
2984 return err;
2985 }
2986