• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * builtin-trace.c
3  *
4  * Builtin 'trace' command:
5  *
6  * Display a continuously updated trace of any workload, CPU, specific PID,
7  * system wide, etc.  Default format is loosely strace like, but any other
8  * event may be specified using --event.
9  *
10  * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11  *
12  * Initially based on the 'trace' prototype by Thomas Gleixner:
13  *
14  * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15  *
16  * Released under the GPL v2. (and only v2, not any later version)
17  */
18 
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include "util/exec_cmd.h"
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include "util/parse-options.h"
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 
37 #include <libaudit.h>
38 #include <stdlib.h>
39 #include <sys/mman.h>
40 #include <linux/futex.h>
41 #include <linux/err.h>
42 
43 /* For older distros: */
44 #ifndef MAP_STACK
45 # define MAP_STACK		0x20000
46 #endif
47 
48 #ifndef MADV_HWPOISON
49 # define MADV_HWPOISON		100
50 
51 #endif
52 
53 #ifndef MADV_MERGEABLE
54 # define MADV_MERGEABLE		12
55 #endif
56 
57 #ifndef MADV_UNMERGEABLE
58 # define MADV_UNMERGEABLE	13
59 #endif
60 
61 #ifndef EFD_SEMAPHORE
62 # define EFD_SEMAPHORE		1
63 #endif
64 
65 #ifndef EFD_NONBLOCK
66 # define EFD_NONBLOCK		00004000
67 #endif
68 
69 #ifndef EFD_CLOEXEC
70 # define EFD_CLOEXEC		02000000
71 #endif
72 
73 #ifndef O_CLOEXEC
74 # define O_CLOEXEC		02000000
75 #endif
76 
77 #ifndef SOCK_DCCP
78 # define SOCK_DCCP		6
79 #endif
80 
81 #ifndef SOCK_CLOEXEC
82 # define SOCK_CLOEXEC		02000000
83 #endif
84 
85 #ifndef SOCK_NONBLOCK
86 # define SOCK_NONBLOCK		00004000
87 #endif
88 
89 #ifndef MSG_CMSG_CLOEXEC
90 # define MSG_CMSG_CLOEXEC	0x40000000
91 #endif
92 
93 #ifndef PERF_FLAG_FD_NO_GROUP
94 # define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
95 #endif
96 
97 #ifndef PERF_FLAG_FD_OUTPUT
98 # define PERF_FLAG_FD_OUTPUT		(1UL << 1)
99 #endif
100 
101 #ifndef PERF_FLAG_PID_CGROUP
102 # define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
103 #endif
104 
105 #ifndef PERF_FLAG_FD_CLOEXEC
106 # define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
107 #endif
108 
109 
110 struct tp_field {
111 	int offset;
112 	union {
113 		u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114 		void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
115 	};
116 };
117 
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
120 { \
121 	u##bits value; \
122 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
123 	return value;  \
124 }
125 
126 TP_UINT_FIELD(8);
127 TP_UINT_FIELD(16);
128 TP_UINT_FIELD(32);
129 TP_UINT_FIELD(64);
130 
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
133 { \
134 	u##bits value; \
135 	memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 	return bswap_##bits(value);\
137 }
138 
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
142 
tp_field__init_uint(struct tp_field * field,struct format_field * format_field,bool needs_swap)143 static int tp_field__init_uint(struct tp_field *field,
144 			       struct format_field *format_field,
145 			       bool needs_swap)
146 {
147 	field->offset = format_field->offset;
148 
149 	switch (format_field->size) {
150 	case 1:
151 		field->integer = tp_field__u8;
152 		break;
153 	case 2:
154 		field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
155 		break;
156 	case 4:
157 		field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
158 		break;
159 	case 8:
160 		field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
161 		break;
162 	default:
163 		return -1;
164 	}
165 
166 	return 0;
167 }
168 
tp_field__ptr(struct tp_field * field,struct perf_sample * sample)169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
170 {
171 	return sample->raw_data + field->offset;
172 }
173 
tp_field__init_ptr(struct tp_field * field,struct format_field * format_field)174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
175 {
176 	field->offset = format_field->offset;
177 	field->pointer = tp_field__ptr;
178 	return 0;
179 }
180 
181 struct syscall_tp {
182 	struct tp_field id;
183 	union {
184 		struct tp_field args, ret;
185 	};
186 };
187 
perf_evsel__init_tp_uint_field(struct perf_evsel * evsel,struct tp_field * field,const char * name)188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189 					  struct tp_field *field,
190 					  const char *name)
191 {
192 	struct format_field *format_field = perf_evsel__field(evsel, name);
193 
194 	if (format_field == NULL)
195 		return -1;
196 
197 	return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 }
199 
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201 	({ struct syscall_tp *sc = evsel->priv;\
202 	   perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
203 
perf_evsel__init_tp_ptr_field(struct perf_evsel * evsel,struct tp_field * field,const char * name)204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205 					 struct tp_field *field,
206 					 const char *name)
207 {
208 	struct format_field *format_field = perf_evsel__field(evsel, name);
209 
210 	if (format_field == NULL)
211 		return -1;
212 
213 	return tp_field__init_ptr(field, format_field);
214 }
215 
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217 	({ struct syscall_tp *sc = evsel->priv;\
218 	   perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
219 
perf_evsel__delete_priv(struct perf_evsel * evsel)220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
221 {
222 	zfree(&evsel->priv);
223 	perf_evsel__delete(evsel);
224 }
225 
perf_evsel__init_syscall_tp(struct perf_evsel * evsel,void * handler)226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
227 {
228 	evsel->priv = malloc(sizeof(struct syscall_tp));
229 	if (evsel->priv != NULL) {
230 		if (perf_evsel__init_sc_tp_uint_field(evsel, id))
231 			goto out_delete;
232 
233 		evsel->handler = handler;
234 		return 0;
235 	}
236 
237 	return -ENOMEM;
238 
239 out_delete:
240 	zfree(&evsel->priv);
241 	return -ENOENT;
242 }
243 
perf_evsel__syscall_newtp(const char * direction,void * handler)244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
245 {
246 	struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
247 
248 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
249 	if (IS_ERR(evsel))
250 		evsel = perf_evsel__newtp("syscalls", direction);
251 
252 	if (IS_ERR(evsel))
253 		return NULL;
254 
255 	if (perf_evsel__init_syscall_tp(evsel, handler))
256 		goto out_delete;
257 
258 	return evsel;
259 
260 out_delete:
261 	perf_evsel__delete_priv(evsel);
262 	return NULL;
263 }
264 
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266 	({ struct syscall_tp *fields = evsel->priv; \
267 	   fields->name.integer(&fields->name, sample); })
268 
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270 	({ struct syscall_tp *fields = evsel->priv; \
271 	   fields->name.pointer(&fields->name, sample); })
272 
273 struct syscall_arg {
274 	unsigned long val;
275 	struct thread *thread;
276 	struct trace  *trace;
277 	void	      *parm;
278 	u8	      idx;
279 	u8	      mask;
280 };
281 
282 struct strarray {
283 	int	    offset;
284 	int	    nr_entries;
285 	const char **entries;
286 };
287 
288 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
289 	.nr_entries = ARRAY_SIZE(array), \
290 	.entries = array, \
291 }
292 
293 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
294 	.offset	    = off, \
295 	.nr_entries = ARRAY_SIZE(array), \
296 	.entries = array, \
297 }
298 
__syscall_arg__scnprintf_strarray(char * bf,size_t size,const char * intfmt,struct syscall_arg * arg)299 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
300 						const char *intfmt,
301 					        struct syscall_arg *arg)
302 {
303 	struct strarray *sa = arg->parm;
304 	int idx = arg->val - sa->offset;
305 
306 	if (idx < 0 || idx >= sa->nr_entries)
307 		return scnprintf(bf, size, intfmt, arg->val);
308 
309 	return scnprintf(bf, size, "%s", sa->entries[idx]);
310 }
311 
syscall_arg__scnprintf_strarray(char * bf,size_t size,struct syscall_arg * arg)312 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
313 					      struct syscall_arg *arg)
314 {
315 	return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
316 }
317 
318 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
319 
320 #if defined(__i386__) || defined(__x86_64__)
321 /*
322  * FIXME: Make this available to all arches as soon as the ioctl beautifier
323  * 	  gets rewritten to support all arches.
324  */
syscall_arg__scnprintf_strhexarray(char * bf,size_t size,struct syscall_arg * arg)325 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
326 						 struct syscall_arg *arg)
327 {
328 	return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
329 }
330 
331 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
332 #endif /* defined(__i386__) || defined(__x86_64__) */
333 
334 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
335 					struct syscall_arg *arg);
336 
337 #define SCA_FD syscall_arg__scnprintf_fd
338 
syscall_arg__scnprintf_fd_at(char * bf,size_t size,struct syscall_arg * arg)339 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 					   struct syscall_arg *arg)
341 {
342 	int fd = arg->val;
343 
344 	if (fd == AT_FDCWD)
345 		return scnprintf(bf, size, "CWD");
346 
347 	return syscall_arg__scnprintf_fd(bf, size, arg);
348 }
349 
350 #define SCA_FDAT syscall_arg__scnprintf_fd_at
351 
352 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 					      struct syscall_arg *arg);
354 
355 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356 
syscall_arg__scnprintf_hex(char * bf,size_t size,struct syscall_arg * arg)357 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
358 					 struct syscall_arg *arg)
359 {
360 	return scnprintf(bf, size, "%#lx", arg->val);
361 }
362 
363 #define SCA_HEX syscall_arg__scnprintf_hex
364 
syscall_arg__scnprintf_int(char * bf,size_t size,struct syscall_arg * arg)365 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 					 struct syscall_arg *arg)
367 {
368 	return scnprintf(bf, size, "%d", arg->val);
369 }
370 
371 #define SCA_INT syscall_arg__scnprintf_int
372 
syscall_arg__scnprintf_mmap_prot(char * bf,size_t size,struct syscall_arg * arg)373 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
374 					       struct syscall_arg *arg)
375 {
376 	int printed = 0, prot = arg->val;
377 
378 	if (prot == PROT_NONE)
379 		return scnprintf(bf, size, "NONE");
380 #define	P_MMAP_PROT(n) \
381 	if (prot & PROT_##n) { \
382 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
383 		prot &= ~PROT_##n; \
384 	}
385 
386 	P_MMAP_PROT(EXEC);
387 	P_MMAP_PROT(READ);
388 	P_MMAP_PROT(WRITE);
389 #ifdef PROT_SEM
390 	P_MMAP_PROT(SEM);
391 #endif
392 	P_MMAP_PROT(GROWSDOWN);
393 	P_MMAP_PROT(GROWSUP);
394 #undef P_MMAP_PROT
395 
396 	if (prot)
397 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
398 
399 	return printed;
400 }
401 
402 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
403 
syscall_arg__scnprintf_mmap_flags(char * bf,size_t size,struct syscall_arg * arg)404 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
405 						struct syscall_arg *arg)
406 {
407 	int printed = 0, flags = arg->val;
408 
409 #define	P_MMAP_FLAG(n) \
410 	if (flags & MAP_##n) { \
411 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
412 		flags &= ~MAP_##n; \
413 	}
414 
415 	P_MMAP_FLAG(SHARED);
416 	P_MMAP_FLAG(PRIVATE);
417 #ifdef MAP_32BIT
418 	P_MMAP_FLAG(32BIT);
419 #endif
420 	P_MMAP_FLAG(ANONYMOUS);
421 	P_MMAP_FLAG(DENYWRITE);
422 	P_MMAP_FLAG(EXECUTABLE);
423 	P_MMAP_FLAG(FILE);
424 	P_MMAP_FLAG(FIXED);
425 	P_MMAP_FLAG(GROWSDOWN);
426 #ifdef MAP_HUGETLB
427 	P_MMAP_FLAG(HUGETLB);
428 #endif
429 	P_MMAP_FLAG(LOCKED);
430 	P_MMAP_FLAG(NONBLOCK);
431 	P_MMAP_FLAG(NORESERVE);
432 	P_MMAP_FLAG(POPULATE);
433 	P_MMAP_FLAG(STACK);
434 #ifdef MAP_UNINITIALIZED
435 	P_MMAP_FLAG(UNINITIALIZED);
436 #endif
437 #undef P_MMAP_FLAG
438 
439 	if (flags)
440 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
441 
442 	return printed;
443 }
444 
445 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
446 
syscall_arg__scnprintf_mremap_flags(char * bf,size_t size,struct syscall_arg * arg)447 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
448 						  struct syscall_arg *arg)
449 {
450 	int printed = 0, flags = arg->val;
451 
452 #define P_MREMAP_FLAG(n) \
453 	if (flags & MREMAP_##n) { \
454 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
455 		flags &= ~MREMAP_##n; \
456 	}
457 
458 	P_MREMAP_FLAG(MAYMOVE);
459 #ifdef MREMAP_FIXED
460 	P_MREMAP_FLAG(FIXED);
461 #endif
462 #undef P_MREMAP_FLAG
463 
464 	if (flags)
465 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
466 
467 	return printed;
468 }
469 
470 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
471 
syscall_arg__scnprintf_madvise_behavior(char * bf,size_t size,struct syscall_arg * arg)472 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
473 						      struct syscall_arg *arg)
474 {
475 	int behavior = arg->val;
476 
477 	switch (behavior) {
478 #define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
479 	P_MADV_BHV(NORMAL);
480 	P_MADV_BHV(RANDOM);
481 	P_MADV_BHV(SEQUENTIAL);
482 	P_MADV_BHV(WILLNEED);
483 	P_MADV_BHV(DONTNEED);
484 	P_MADV_BHV(REMOVE);
485 	P_MADV_BHV(DONTFORK);
486 	P_MADV_BHV(DOFORK);
487 	P_MADV_BHV(HWPOISON);
488 #ifdef MADV_SOFT_OFFLINE
489 	P_MADV_BHV(SOFT_OFFLINE);
490 #endif
491 	P_MADV_BHV(MERGEABLE);
492 	P_MADV_BHV(UNMERGEABLE);
493 #ifdef MADV_HUGEPAGE
494 	P_MADV_BHV(HUGEPAGE);
495 #endif
496 #ifdef MADV_NOHUGEPAGE
497 	P_MADV_BHV(NOHUGEPAGE);
498 #endif
499 #ifdef MADV_DONTDUMP
500 	P_MADV_BHV(DONTDUMP);
501 #endif
502 #ifdef MADV_DODUMP
503 	P_MADV_BHV(DODUMP);
504 #endif
505 #undef P_MADV_PHV
506 	default: break;
507 	}
508 
509 	return scnprintf(bf, size, "%#x", behavior);
510 }
511 
512 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
513 
syscall_arg__scnprintf_flock(char * bf,size_t size,struct syscall_arg * arg)514 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
515 					   struct syscall_arg *arg)
516 {
517 	int printed = 0, op = arg->val;
518 
519 	if (op == 0)
520 		return scnprintf(bf, size, "NONE");
521 #define	P_CMD(cmd) \
522 	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
523 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
524 		op &= ~LOCK_##cmd; \
525 	}
526 
527 	P_CMD(SH);
528 	P_CMD(EX);
529 	P_CMD(NB);
530 	P_CMD(UN);
531 	P_CMD(MAND);
532 	P_CMD(RW);
533 	P_CMD(READ);
534 	P_CMD(WRITE);
535 #undef P_OP
536 
537 	if (op)
538 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
539 
540 	return printed;
541 }
542 
543 #define SCA_FLOCK syscall_arg__scnprintf_flock
544 
syscall_arg__scnprintf_futex_op(char * bf,size_t size,struct syscall_arg * arg)545 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
546 {
547 	enum syscall_futex_args {
548 		SCF_UADDR   = (1 << 0),
549 		SCF_OP	    = (1 << 1),
550 		SCF_VAL	    = (1 << 2),
551 		SCF_TIMEOUT = (1 << 3),
552 		SCF_UADDR2  = (1 << 4),
553 		SCF_VAL3    = (1 << 5),
554 	};
555 	int op = arg->val;
556 	int cmd = op & FUTEX_CMD_MASK;
557 	size_t printed = 0;
558 
559 	switch (cmd) {
560 #define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
561 	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
562 	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
563 	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
564 	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
565 	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
566 	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
567 	P_FUTEX_OP(WAKE_OP);							  break;
568 	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569 	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570 	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
571 	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
572 	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
573 	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
574 	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
575 	}
576 
577 	if (op & FUTEX_PRIVATE_FLAG)
578 		printed += scnprintf(bf + printed, size - printed, "|PRIV");
579 
580 	if (op & FUTEX_CLOCK_REALTIME)
581 		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
582 
583 	return printed;
584 }
585 
586 #define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
587 
588 static const char *bpf_cmd[] = {
589 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
590 	"MAP_GET_NEXT_KEY", "PROG_LOAD",
591 };
592 static DEFINE_STRARRAY(bpf_cmd);
593 
594 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
595 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
596 
597 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
598 static DEFINE_STRARRAY(itimers);
599 
600 static const char *keyctl_options[] = {
601 	"GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
602 	"SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
603 	"INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
604 	"ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
605 	"INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
606 };
607 static DEFINE_STRARRAY(keyctl_options);
608 
609 static const char *whences[] = { "SET", "CUR", "END",
610 #ifdef SEEK_DATA
611 "DATA",
612 #endif
613 #ifdef SEEK_HOLE
614 "HOLE",
615 #endif
616 };
617 static DEFINE_STRARRAY(whences);
618 
619 static const char *fcntl_cmds[] = {
620 	"DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
621 	"SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
622 	"F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
623 	"F_GETOWNER_UIDS",
624 };
625 static DEFINE_STRARRAY(fcntl_cmds);
626 
627 static const char *rlimit_resources[] = {
628 	"CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
629 	"MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
630 	"RTTIME",
631 };
632 static DEFINE_STRARRAY(rlimit_resources);
633 
634 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
635 static DEFINE_STRARRAY(sighow);
636 
637 static const char *clockid[] = {
638 	"REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
639 	"MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
640 	"REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
641 };
642 static DEFINE_STRARRAY(clockid);
643 
644 static const char *socket_families[] = {
645 	"UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
646 	"BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
647 	"SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
648 	"RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
649 	"BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
650 	"ALG", "NFC", "VSOCK",
651 };
652 static DEFINE_STRARRAY(socket_families);
653 
654 #ifndef SOCK_TYPE_MASK
655 #define SOCK_TYPE_MASK 0xf
656 #endif
657 
syscall_arg__scnprintf_socket_type(char * bf,size_t size,struct syscall_arg * arg)658 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
659 						      struct syscall_arg *arg)
660 {
661 	size_t printed;
662 	int type = arg->val,
663 	    flags = type & ~SOCK_TYPE_MASK;
664 
665 	type &= SOCK_TYPE_MASK;
666 	/*
667  	 * Can't use a strarray, MIPS may override for ABI reasons.
668  	 */
669 	switch (type) {
670 #define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
671 	P_SK_TYPE(STREAM);
672 	P_SK_TYPE(DGRAM);
673 	P_SK_TYPE(RAW);
674 	P_SK_TYPE(RDM);
675 	P_SK_TYPE(SEQPACKET);
676 	P_SK_TYPE(DCCP);
677 	P_SK_TYPE(PACKET);
678 #undef P_SK_TYPE
679 	default:
680 		printed = scnprintf(bf, size, "%#x", type);
681 	}
682 
683 #define	P_SK_FLAG(n) \
684 	if (flags & SOCK_##n) { \
685 		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
686 		flags &= ~SOCK_##n; \
687 	}
688 
689 	P_SK_FLAG(CLOEXEC);
690 	P_SK_FLAG(NONBLOCK);
691 #undef P_SK_FLAG
692 
693 	if (flags)
694 		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
695 
696 	return printed;
697 }
698 
699 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
700 
701 #ifndef MSG_PROBE
702 #define MSG_PROBE	     0x10
703 #endif
704 #ifndef MSG_WAITFORONE
705 #define MSG_WAITFORONE	0x10000
706 #endif
707 #ifndef MSG_SENDPAGE_NOTLAST
708 #define MSG_SENDPAGE_NOTLAST 0x20000
709 #endif
710 #ifndef MSG_FASTOPEN
711 #define MSG_FASTOPEN	     0x20000000
712 #endif
713 
syscall_arg__scnprintf_msg_flags(char * bf,size_t size,struct syscall_arg * arg)714 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
715 					       struct syscall_arg *arg)
716 {
717 	int printed = 0, flags = arg->val;
718 
719 	if (flags == 0)
720 		return scnprintf(bf, size, "NONE");
721 #define	P_MSG_FLAG(n) \
722 	if (flags & MSG_##n) { \
723 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
724 		flags &= ~MSG_##n; \
725 	}
726 
727 	P_MSG_FLAG(OOB);
728 	P_MSG_FLAG(PEEK);
729 	P_MSG_FLAG(DONTROUTE);
730 	P_MSG_FLAG(TRYHARD);
731 	P_MSG_FLAG(CTRUNC);
732 	P_MSG_FLAG(PROBE);
733 	P_MSG_FLAG(TRUNC);
734 	P_MSG_FLAG(DONTWAIT);
735 	P_MSG_FLAG(EOR);
736 	P_MSG_FLAG(WAITALL);
737 	P_MSG_FLAG(FIN);
738 	P_MSG_FLAG(SYN);
739 	P_MSG_FLAG(CONFIRM);
740 	P_MSG_FLAG(RST);
741 	P_MSG_FLAG(ERRQUEUE);
742 	P_MSG_FLAG(NOSIGNAL);
743 	P_MSG_FLAG(MORE);
744 	P_MSG_FLAG(WAITFORONE);
745 	P_MSG_FLAG(SENDPAGE_NOTLAST);
746 	P_MSG_FLAG(FASTOPEN);
747 	P_MSG_FLAG(CMSG_CLOEXEC);
748 #undef P_MSG_FLAG
749 
750 	if (flags)
751 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752 
753 	return printed;
754 }
755 
756 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
757 
syscall_arg__scnprintf_access_mode(char * bf,size_t size,struct syscall_arg * arg)758 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
759 						 struct syscall_arg *arg)
760 {
761 	size_t printed = 0;
762 	int mode = arg->val;
763 
764 	if (mode == F_OK) /* 0 */
765 		return scnprintf(bf, size, "F");
766 #define	P_MODE(n) \
767 	if (mode & n##_OK) { \
768 		printed += scnprintf(bf + printed, size - printed, "%s", #n); \
769 		mode &= ~n##_OK; \
770 	}
771 
772 	P_MODE(R);
773 	P_MODE(W);
774 	P_MODE(X);
775 #undef P_MODE
776 
777 	if (mode)
778 		printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
779 
780 	return printed;
781 }
782 
783 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
784 
785 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
786 					      struct syscall_arg *arg);
787 
788 #define SCA_FILENAME syscall_arg__scnprintf_filename
789 
syscall_arg__scnprintf_open_flags(char * bf,size_t size,struct syscall_arg * arg)790 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
791 					       struct syscall_arg *arg)
792 {
793 	int printed = 0, flags = arg->val;
794 
795 	if (!(flags & O_CREAT))
796 		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
797 
798 	if (flags == 0)
799 		return scnprintf(bf, size, "RDONLY");
800 #define	P_FLAG(n) \
801 	if (flags & O_##n) { \
802 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
803 		flags &= ~O_##n; \
804 	}
805 
806 	P_FLAG(APPEND);
807 	P_FLAG(ASYNC);
808 	P_FLAG(CLOEXEC);
809 	P_FLAG(CREAT);
810 	P_FLAG(DIRECT);
811 	P_FLAG(DIRECTORY);
812 	P_FLAG(EXCL);
813 	P_FLAG(LARGEFILE);
814 	P_FLAG(NOATIME);
815 	P_FLAG(NOCTTY);
816 #ifdef O_NONBLOCK
817 	P_FLAG(NONBLOCK);
818 #elif O_NDELAY
819 	P_FLAG(NDELAY);
820 #endif
821 #ifdef O_PATH
822 	P_FLAG(PATH);
823 #endif
824 	P_FLAG(RDWR);
825 #ifdef O_DSYNC
826 	if ((flags & O_SYNC) == O_SYNC)
827 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
828 	else {
829 		P_FLAG(DSYNC);
830 	}
831 #else
832 	P_FLAG(SYNC);
833 #endif
834 	P_FLAG(TRUNC);
835 	P_FLAG(WRONLY);
836 #undef P_FLAG
837 
838 	if (flags)
839 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
840 
841 	return printed;
842 }
843 
844 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
845 
syscall_arg__scnprintf_perf_flags(char * bf,size_t size,struct syscall_arg * arg)846 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
847 						struct syscall_arg *arg)
848 {
849 	int printed = 0, flags = arg->val;
850 
851 	if (flags == 0)
852 		return 0;
853 
854 #define	P_FLAG(n) \
855 	if (flags & PERF_FLAG_##n) { \
856 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
857 		flags &= ~PERF_FLAG_##n; \
858 	}
859 
860 	P_FLAG(FD_NO_GROUP);
861 	P_FLAG(FD_OUTPUT);
862 	P_FLAG(PID_CGROUP);
863 	P_FLAG(FD_CLOEXEC);
864 #undef P_FLAG
865 
866 	if (flags)
867 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
868 
869 	return printed;
870 }
871 
872 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
873 
syscall_arg__scnprintf_eventfd_flags(char * bf,size_t size,struct syscall_arg * arg)874 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
875 						   struct syscall_arg *arg)
876 {
877 	int printed = 0, flags = arg->val;
878 
879 	if (flags == 0)
880 		return scnprintf(bf, size, "NONE");
881 #define	P_FLAG(n) \
882 	if (flags & EFD_##n) { \
883 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
884 		flags &= ~EFD_##n; \
885 	}
886 
887 	P_FLAG(SEMAPHORE);
888 	P_FLAG(CLOEXEC);
889 	P_FLAG(NONBLOCK);
890 #undef P_FLAG
891 
892 	if (flags)
893 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
894 
895 	return printed;
896 }
897 
898 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
899 
syscall_arg__scnprintf_pipe_flags(char * bf,size_t size,struct syscall_arg * arg)900 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
901 						struct syscall_arg *arg)
902 {
903 	int printed = 0, flags = arg->val;
904 
905 #define	P_FLAG(n) \
906 	if (flags & O_##n) { \
907 		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
908 		flags &= ~O_##n; \
909 	}
910 
911 	P_FLAG(CLOEXEC);
912 	P_FLAG(NONBLOCK);
913 #undef P_FLAG
914 
915 	if (flags)
916 		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
917 
918 	return printed;
919 }
920 
921 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
922 
syscall_arg__scnprintf_signum(char * bf,size_t size,struct syscall_arg * arg)923 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
924 {
925 	int sig = arg->val;
926 
927 	switch (sig) {
928 #define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
929 	P_SIGNUM(HUP);
930 	P_SIGNUM(INT);
931 	P_SIGNUM(QUIT);
932 	P_SIGNUM(ILL);
933 	P_SIGNUM(TRAP);
934 	P_SIGNUM(ABRT);
935 	P_SIGNUM(BUS);
936 	P_SIGNUM(FPE);
937 	P_SIGNUM(KILL);
938 	P_SIGNUM(USR1);
939 	P_SIGNUM(SEGV);
940 	P_SIGNUM(USR2);
941 	P_SIGNUM(PIPE);
942 	P_SIGNUM(ALRM);
943 	P_SIGNUM(TERM);
944 	P_SIGNUM(CHLD);
945 	P_SIGNUM(CONT);
946 	P_SIGNUM(STOP);
947 	P_SIGNUM(TSTP);
948 	P_SIGNUM(TTIN);
949 	P_SIGNUM(TTOU);
950 	P_SIGNUM(URG);
951 	P_SIGNUM(XCPU);
952 	P_SIGNUM(XFSZ);
953 	P_SIGNUM(VTALRM);
954 	P_SIGNUM(PROF);
955 	P_SIGNUM(WINCH);
956 	P_SIGNUM(IO);
957 	P_SIGNUM(PWR);
958 	P_SIGNUM(SYS);
959 #ifdef SIGEMT
960 	P_SIGNUM(EMT);
961 #endif
962 #ifdef SIGSTKFLT
963 	P_SIGNUM(STKFLT);
964 #endif
965 #ifdef SIGSWI
966 	P_SIGNUM(SWI);
967 #endif
968 	default: break;
969 	}
970 
971 	return scnprintf(bf, size, "%#x", sig);
972 }
973 
974 #define SCA_SIGNUM syscall_arg__scnprintf_signum
975 
976 #if defined(__i386__) || defined(__x86_64__)
977 /*
978  * FIXME: Make this available to all arches.
979  */
980 #define TCGETS		0x5401
981 
982 static const char *tioctls[] = {
983 	"TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
984 	"TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
985 	"TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
986 	"TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
987 	"TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
988 	"TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
989 	"TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
990 	"TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
991 	"TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
992 	"TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
993 	"TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
994 	[0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
995 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
996 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
997 	"TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
998 };
999 
1000 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1001 #endif /* defined(__i386__) || defined(__x86_64__) */
1002 
1003 #define STRARRAY(arg, name, array) \
1004 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1005 	  .arg_parm	 = { [arg] = &strarray__##array, }
1006 
1007 static struct syscall_fmt {
1008 	const char *name;
1009 	const char *alias;
1010 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1011 	void	   *arg_parm[6];
1012 	bool	   errmsg;
1013 	bool	   timeout;
1014 	bool	   hexret;
1015 } syscall_fmts[] = {
1016 	{ .name	    = "access",	    .errmsg = true,
1017 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1018 			     [1] = SCA_ACCMODE,  /* mode */ }, },
1019 	{ .name	    = "arch_prctl", .errmsg = true, .alias = "prctl", },
1020 	{ .name	    = "bpf",	    .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1021 	{ .name	    = "brk",	    .hexret = true,
1022 	  .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1023 	{ .name	    = "chdir",	    .errmsg = true,
1024 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1025 	{ .name	    = "chmod",	    .errmsg = true,
1026 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1027 	{ .name	    = "chroot",	    .errmsg = true,
1028 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1029 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
1030 	{ .name	    = "close",	    .errmsg = true,
1031 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1032 	{ .name	    = "connect",    .errmsg = true, },
1033 	{ .name	    = "creat",	    .errmsg = true,
1034 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1035 	{ .name	    = "dup",	    .errmsg = true,
1036 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1037 	{ .name	    = "dup2",	    .errmsg = true,
1038 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 	{ .name	    = "dup3",	    .errmsg = true,
1040 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 	{ .name	    = "epoll_ctl",  .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1042 	{ .name	    = "eventfd2",   .errmsg = true,
1043 	  .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1044 	{ .name	    = "faccessat",  .errmsg = true,
1045 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1046 			     [1] = SCA_FILENAME, /* filename */ }, },
1047 	{ .name	    = "fadvise64",  .errmsg = true,
1048 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1049 	{ .name	    = "fallocate",  .errmsg = true,
1050 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1051 	{ .name	    = "fchdir",	    .errmsg = true,
1052 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1053 	{ .name	    = "fchmod",	    .errmsg = true,
1054 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1055 	{ .name	    = "fchmodat",   .errmsg = true,
1056 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1057 			     [1] = SCA_FILENAME, /* filename */ }, },
1058 	{ .name	    = "fchown",	    .errmsg = true,
1059 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1060 	{ .name	    = "fchownat",   .errmsg = true,
1061 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1062 			     [1] = SCA_FILENAME, /* filename */ }, },
1063 	{ .name	    = "fcntl",	    .errmsg = true,
1064 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1065 			     [1] = SCA_STRARRAY, /* cmd */ },
1066 	  .arg_parm	 = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1067 	{ .name	    = "fdatasync",  .errmsg = true,
1068 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1069 	{ .name	    = "flock",	    .errmsg = true,
1070 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1071 			     [1] = SCA_FLOCK, /* cmd */ }, },
1072 	{ .name	    = "fsetxattr",  .errmsg = true,
1073 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1074 	{ .name	    = "fstat",	    .errmsg = true, .alias = "newfstat",
1075 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1076 	{ .name	    = "fstatat",    .errmsg = true, .alias = "newfstatat",
1077 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1078 			     [1] = SCA_FILENAME, /* filename */ }, },
1079 	{ .name	    = "fstatfs",    .errmsg = true,
1080 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1081 	{ .name	    = "fsync",    .errmsg = true,
1082 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1083 	{ .name	    = "ftruncate", .errmsg = true,
1084 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1085 	{ .name	    = "futex",	    .errmsg = true,
1086 	  .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1087 	{ .name	    = "futimesat", .errmsg = true,
1088 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1089 			     [1] = SCA_FILENAME, /* filename */ }, },
1090 	{ .name	    = "getdents",   .errmsg = true,
1091 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1092 	{ .name	    = "getdents64", .errmsg = true,
1093 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1094 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1095 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096 	{ .name	    = "getxattr",    .errmsg = true,
1097 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1098 	{ .name	    = "inotify_add_watch",	    .errmsg = true,
1099 	  .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1100 	{ .name	    = "ioctl",	    .errmsg = true,
1101 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1102 #if defined(__i386__) || defined(__x86_64__)
1103 /*
1104  * FIXME: Make this available to all arches.
1105  */
1106 			     [1] = SCA_STRHEXARRAY, /* cmd */
1107 			     [2] = SCA_HEX, /* arg */ },
1108 	  .arg_parm	 = { [1] = &strarray__tioctls, /* cmd */ }, },
1109 #else
1110 			     [2] = SCA_HEX, /* arg */ }, },
1111 #endif
1112 	{ .name	    = "keyctl",	    .errmsg = true, STRARRAY(0, option, keyctl_options), },
1113 	{ .name	    = "kill",	    .errmsg = true,
1114 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1115 	{ .name	    = "lchown",    .errmsg = true,
1116 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1117 	{ .name	    = "lgetxattr",  .errmsg = true,
1118 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1119 	{ .name	    = "linkat",	    .errmsg = true,
1120 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1121 	{ .name	    = "listxattr",  .errmsg = true,
1122 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123 	{ .name	    = "llistxattr", .errmsg = true,
1124 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1125 	{ .name	    = "lremovexattr",  .errmsg = true,
1126 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127 	{ .name	    = "lseek",	    .errmsg = true,
1128 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1129 			     [2] = SCA_STRARRAY, /* whence */ },
1130 	  .arg_parm	 = { [2] = &strarray__whences, /* whence */ }, },
1131 	{ .name	    = "lsetxattr",  .errmsg = true,
1132 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1133 	{ .name	    = "lstat",	    .errmsg = true, .alias = "newlstat",
1134 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1135 	{ .name	    = "lsxattr",    .errmsg = true,
1136 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1137 	{ .name     = "madvise",    .errmsg = true,
1138 	  .arg_scnprintf = { [0] = SCA_HEX,	 /* start */
1139 			     [2] = SCA_MADV_BHV, /* behavior */ }, },
1140 	{ .name	    = "mkdir",    .errmsg = true,
1141 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1142 	{ .name	    = "mkdirat",    .errmsg = true,
1143 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1144 			     [1] = SCA_FILENAME, /* pathname */ }, },
1145 	{ .name	    = "mknod",      .errmsg = true,
1146 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1147 	{ .name	    = "mknodat",    .errmsg = true,
1148 	  .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1149 			     [1] = SCA_FILENAME, /* filename */ }, },
1150 	{ .name	    = "mlock",	    .errmsg = true,
1151 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1152 	{ .name	    = "mlockall",   .errmsg = true,
1153 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1154 	{ .name	    = "mmap",	    .hexret = true,
1155 /* The standard mmap maps to old_mmap on s390x */
1156 #if defined(__s390x__)
1157 	.alias = "old_mmap",
1158 #endif
1159 	  .arg_scnprintf = { [0] = SCA_HEX,	  /* addr */
1160 			     [2] = SCA_MMAP_PROT, /* prot */
1161 			     [3] = SCA_MMAP_FLAGS, /* flags */
1162 			     [4] = SCA_FD, 	  /* fd */ }, },
1163 	{ .name	    = "mprotect",   .errmsg = true,
1164 	  .arg_scnprintf = { [0] = SCA_HEX, /* start */
1165 			     [2] = SCA_MMAP_PROT, /* prot */ }, },
1166 	{ .name	    = "mq_unlink", .errmsg = true,
1167 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1168 	{ .name	    = "mremap",	    .hexret = true,
1169 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1170 			     [3] = SCA_MREMAP_FLAGS, /* flags */
1171 			     [4] = SCA_HEX, /* new_addr */ }, },
1172 	{ .name	    = "munlock",    .errmsg = true,
1173 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1174 	{ .name	    = "munmap",	    .errmsg = true,
1175 	  .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1176 	{ .name	    = "name_to_handle_at", .errmsg = true,
1177 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1178 	{ .name	    = "newfstatat", .errmsg = true,
1179 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1180 			     [1] = SCA_FILENAME, /* filename */ }, },
1181 	{ .name	    = "open",	    .errmsg = true,
1182 	  .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
1183 			     [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1184 	{ .name	    = "open_by_handle_at", .errmsg = true,
1185 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1186 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1187 	{ .name	    = "openat",	    .errmsg = true,
1188 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1189 			     [1] = SCA_FILENAME, /* filename */
1190 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1191 	{ .name	    = "perf_event_open", .errmsg = true,
1192 	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
1193 			     [2] = SCA_INT, /* cpu */
1194 			     [3] = SCA_FD,  /* group_fd */
1195 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
1196 	{ .name	    = "pipe2",	    .errmsg = true,
1197 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1198 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },
1199 	{ .name	    = "ppoll",	    .errmsg = true, .timeout = true, },
1200 	{ .name	    = "pread",	    .errmsg = true, .alias = "pread64",
1201 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1202 	{ .name	    = "preadv",	    .errmsg = true, .alias = "pread",
1203 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204 	{ .name	    = "prlimit64",  .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1205 	{ .name	    = "pwrite",	    .errmsg = true, .alias = "pwrite64",
1206 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1207 	{ .name	    = "pwritev",    .errmsg = true,
1208 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1209 	{ .name	    = "read",	    .errmsg = true,
1210 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1211 	{ .name	    = "readlink",   .errmsg = true,
1212 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1213 	{ .name	    = "readlinkat", .errmsg = true,
1214 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1215 			     [1] = SCA_FILENAME, /* pathname */ }, },
1216 	{ .name	    = "readv",	    .errmsg = true,
1217 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1218 	{ .name	    = "recvfrom",   .errmsg = true,
1219 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1220 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1221 	{ .name	    = "recvmmsg",   .errmsg = true,
1222 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1223 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1224 	{ .name	    = "recvmsg",    .errmsg = true,
1225 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1226 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1227 	{ .name	    = "removexattr", .errmsg = true,
1228 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1229 	{ .name	    = "renameat",   .errmsg = true,
1230 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1231 	{ .name	    = "rmdir",    .errmsg = true,
1232 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1233 	{ .name	    = "rt_sigaction", .errmsg = true,
1234 	  .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1235 	{ .name	    = "rt_sigprocmask",  .errmsg = true, STRARRAY(0, how, sighow), },
1236 	{ .name	    = "rt_sigqueueinfo", .errmsg = true,
1237 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1238 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
1239 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1240 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
1241 	{ .name	    = "sendmmsg",    .errmsg = true,
1242 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1243 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1244 	{ .name	    = "sendmsg",    .errmsg = true,
1245 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1246 			     [2] = SCA_MSG_FLAGS, /* flags */ }, },
1247 	{ .name	    = "sendto",	    .errmsg = true,
1248 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
1249 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
1250 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
1251 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1252 	{ .name	    = "setxattr",   .errmsg = true,
1253 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254 	{ .name	    = "shutdown",   .errmsg = true,
1255 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1256 	{ .name	    = "socket",	    .errmsg = true,
1257 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1258 			     [1] = SCA_SK_TYPE, /* type */ },
1259 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1260 	{ .name	    = "socketpair", .errmsg = true,
1261 	  .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1262 			     [1] = SCA_SK_TYPE, /* type */ },
1263 	  .arg_parm	 = { [0] = &strarray__socket_families, /* family */ }, },
1264 	{ .name	    = "stat",	    .errmsg = true, .alias = "newstat",
1265 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1266 	{ .name	    = "statfs",	    .errmsg = true,
1267 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1268 	{ .name	    = "swapoff",    .errmsg = true,
1269 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1270 	{ .name	    = "swapon",	    .errmsg = true,
1271 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1272 	{ .name	    = "symlinkat",  .errmsg = true,
1273 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1274 	{ .name	    = "tgkill",	    .errmsg = true,
1275 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1276 	{ .name	    = "tkill",	    .errmsg = true,
1277 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1278 	{ .name	    = "truncate",   .errmsg = true,
1279 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1280 	{ .name	    = "uname",	    .errmsg = true, .alias = "newuname", },
1281 	{ .name	    = "unlinkat",   .errmsg = true,
1282 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1283 			     [1] = SCA_FILENAME, /* pathname */ }, },
1284 	{ .name	    = "utime",  .errmsg = true,
1285 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1286 	{ .name	    = "utimensat",  .errmsg = true,
1287 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1288 			     [1] = SCA_FILENAME, /* filename */ }, },
1289 	{ .name	    = "utimes",  .errmsg = true,
1290 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1291 	{ .name	    = "vmsplice",  .errmsg = true,
1292 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1293 	{ .name	    = "write",	    .errmsg = true,
1294 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1295 	{ .name	    = "writev",	    .errmsg = true,
1296 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1297 };
1298 
syscall_fmt__cmp(const void * name,const void * fmtp)1299 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1300 {
1301 	const struct syscall_fmt *fmt = fmtp;
1302 	return strcmp(name, fmt->name);
1303 }
1304 
syscall_fmt__find(const char * name)1305 static struct syscall_fmt *syscall_fmt__find(const char *name)
1306 {
1307 	const int nmemb = ARRAY_SIZE(syscall_fmts);
1308 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1309 }
1310 
1311 struct syscall {
1312 	struct event_format *tp_format;
1313 	int		    nr_args;
1314 	struct format_field *args;
1315 	const char	    *name;
1316 	bool		    is_exit;
1317 	struct syscall_fmt  *fmt;
1318 	size_t		    (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1319 	void		    **arg_parm;
1320 };
1321 
fprintf_duration(unsigned long t,FILE * fp)1322 static size_t fprintf_duration(unsigned long t, FILE *fp)
1323 {
1324 	double duration = (double)t / NSEC_PER_MSEC;
1325 	size_t printed = fprintf(fp, "(");
1326 
1327 	if (duration >= 1.0)
1328 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1329 	else if (duration >= 0.01)
1330 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1331 	else
1332 		printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1333 	return printed + fprintf(fp, "): ");
1334 }
1335 
1336 /**
1337  * filename.ptr: The filename char pointer that will be vfs_getname'd
1338  * filename.entry_str_pos: Where to insert the string translated from
1339  *                         filename.ptr by the vfs_getname tracepoint/kprobe.
1340  */
1341 struct thread_trace {
1342 	u64		  entry_time;
1343 	u64		  exit_time;
1344 	bool		  entry_pending;
1345 	unsigned long	  nr_events;
1346 	unsigned long	  pfmaj, pfmin;
1347 	char		  *entry_str;
1348 	double		  runtime_ms;
1349         struct {
1350 		unsigned long ptr;
1351 		short int     entry_str_pos;
1352 		bool	      pending_open;
1353 		unsigned int  namelen;
1354 		char	      *name;
1355 	} filename;
1356 	struct {
1357 		int	  max;
1358 		char	  **table;
1359 	} paths;
1360 
1361 	struct intlist *syscall_stats;
1362 };
1363 
thread_trace__new(void)1364 static struct thread_trace *thread_trace__new(void)
1365 {
1366 	struct thread_trace *ttrace =  zalloc(sizeof(struct thread_trace));
1367 
1368 	if (ttrace)
1369 		ttrace->paths.max = -1;
1370 
1371 	ttrace->syscall_stats = intlist__new(NULL);
1372 
1373 	return ttrace;
1374 }
1375 
thread__trace(struct thread * thread,FILE * fp)1376 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1377 {
1378 	struct thread_trace *ttrace;
1379 
1380 	if (thread == NULL)
1381 		goto fail;
1382 
1383 	if (thread__priv(thread) == NULL)
1384 		thread__set_priv(thread, thread_trace__new());
1385 
1386 	if (thread__priv(thread) == NULL)
1387 		goto fail;
1388 
1389 	ttrace = thread__priv(thread);
1390 	++ttrace->nr_events;
1391 
1392 	return ttrace;
1393 fail:
1394 	color_fprintf(fp, PERF_COLOR_RED,
1395 		      "WARNING: not enough memory, dropping samples!\n");
1396 	return NULL;
1397 }
1398 
1399 #define TRACE_PFMAJ		(1 << 0)
1400 #define TRACE_PFMIN		(1 << 1)
1401 
1402 static const size_t trace__entry_str_size = 2048;
1403 
1404 struct trace {
1405 	struct perf_tool	tool;
1406 	struct {
1407 		int		machine;
1408 		int		open_id;
1409 	}			audit;
1410 	struct {
1411 		int		max;
1412 		struct syscall  *table;
1413 		struct {
1414 			struct perf_evsel *sys_enter,
1415 					  *sys_exit;
1416 		}		events;
1417 	} syscalls;
1418 	struct record_opts	opts;
1419 	struct perf_evlist	*evlist;
1420 	struct machine		*host;
1421 	struct thread		*current;
1422 	u64			base_time;
1423 	FILE			*output;
1424 	unsigned long		nr_events;
1425 	struct strlist		*ev_qualifier;
1426 	struct {
1427 		size_t		nr;
1428 		int		*entries;
1429 	}			ev_qualifier_ids;
1430 	struct intlist		*tid_list;
1431 	struct intlist		*pid_list;
1432 	struct {
1433 		size_t		nr;
1434 		pid_t		*entries;
1435 	}			filter_pids;
1436 	double			duration_filter;
1437 	double			runtime_ms;
1438 	struct {
1439 		u64		vfs_getname,
1440 				proc_getname;
1441 	} stats;
1442 	bool			not_ev_qualifier;
1443 	bool			live;
1444 	bool			full_time;
1445 	bool			sched;
1446 	bool			multiple_threads;
1447 	bool			summary;
1448 	bool			summary_only;
1449 	bool			show_comm;
1450 	bool			show_tool_stats;
1451 	bool			trace_syscalls;
1452 	bool			force;
1453 	bool			vfs_getname;
1454 	int			trace_pgfaults;
1455 };
1456 
trace__set_fd_pathname(struct thread * thread,int fd,const char * pathname)1457 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1458 {
1459 	struct thread_trace *ttrace = thread__priv(thread);
1460 
1461 	if (fd > ttrace->paths.max) {
1462 		char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1463 
1464 		if (npath == NULL)
1465 			return -1;
1466 
1467 		if (ttrace->paths.max != -1) {
1468 			memset(npath + ttrace->paths.max + 1, 0,
1469 			       (fd - ttrace->paths.max) * sizeof(char *));
1470 		} else {
1471 			memset(npath, 0, (fd + 1) * sizeof(char *));
1472 		}
1473 
1474 		ttrace->paths.table = npath;
1475 		ttrace->paths.max   = fd;
1476 	}
1477 
1478 	ttrace->paths.table[fd] = strdup(pathname);
1479 
1480 	return ttrace->paths.table[fd] != NULL ? 0 : -1;
1481 }
1482 
thread__read_fd_path(struct thread * thread,int fd)1483 static int thread__read_fd_path(struct thread *thread, int fd)
1484 {
1485 	char linkname[PATH_MAX], pathname[PATH_MAX];
1486 	struct stat st;
1487 	int ret;
1488 
1489 	if (thread->pid_ == thread->tid) {
1490 		scnprintf(linkname, sizeof(linkname),
1491 			  "/proc/%d/fd/%d", thread->pid_, fd);
1492 	} else {
1493 		scnprintf(linkname, sizeof(linkname),
1494 			  "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1495 	}
1496 
1497 	if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1498 		return -1;
1499 
1500 	ret = readlink(linkname, pathname, sizeof(pathname));
1501 
1502 	if (ret < 0 || ret > st.st_size)
1503 		return -1;
1504 
1505 	pathname[ret] = '\0';
1506 	return trace__set_fd_pathname(thread, fd, pathname);
1507 }
1508 
thread__fd_path(struct thread * thread,int fd,struct trace * trace)1509 static const char *thread__fd_path(struct thread *thread, int fd,
1510 				   struct trace *trace)
1511 {
1512 	struct thread_trace *ttrace = thread__priv(thread);
1513 
1514 	if (ttrace == NULL)
1515 		return NULL;
1516 
1517 	if (fd < 0)
1518 		return NULL;
1519 
1520 	if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1521 		if (!trace->live)
1522 			return NULL;
1523 		++trace->stats.proc_getname;
1524 		if (thread__read_fd_path(thread, fd))
1525 			return NULL;
1526 	}
1527 
1528 	return ttrace->paths.table[fd];
1529 }
1530 
syscall_arg__scnprintf_fd(char * bf,size_t size,struct syscall_arg * arg)1531 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1532 					struct syscall_arg *arg)
1533 {
1534 	int fd = arg->val;
1535 	size_t printed = scnprintf(bf, size, "%d", fd);
1536 	const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1537 
1538 	if (path)
1539 		printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1540 
1541 	return printed;
1542 }
1543 
syscall_arg__scnprintf_close_fd(char * bf,size_t size,struct syscall_arg * arg)1544 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1545 					      struct syscall_arg *arg)
1546 {
1547 	int fd = arg->val;
1548 	size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1549 	struct thread_trace *ttrace = thread__priv(arg->thread);
1550 
1551 	if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1552 		zfree(&ttrace->paths.table[fd]);
1553 
1554 	return printed;
1555 }
1556 
thread__set_filename_pos(struct thread * thread,const char * bf,unsigned long ptr)1557 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1558 				     unsigned long ptr)
1559 {
1560 	struct thread_trace *ttrace = thread__priv(thread);
1561 
1562 	ttrace->filename.ptr = ptr;
1563 	ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1564 }
1565 
syscall_arg__scnprintf_filename(char * bf,size_t size,struct syscall_arg * arg)1566 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1567 					      struct syscall_arg *arg)
1568 {
1569 	unsigned long ptr = arg->val;
1570 
1571 	if (!arg->trace->vfs_getname)
1572 		return scnprintf(bf, size, "%#x", ptr);
1573 
1574 	thread__set_filename_pos(arg->thread, bf, ptr);
1575 	return 0;
1576 }
1577 
trace__filter_duration(struct trace * trace,double t)1578 static bool trace__filter_duration(struct trace *trace, double t)
1579 {
1580 	return t < (trace->duration_filter * NSEC_PER_MSEC);
1581 }
1582 
trace__fprintf_tstamp(struct trace * trace,u64 tstamp,FILE * fp)1583 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1584 {
1585 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1586 
1587 	return fprintf(fp, "%10.3f ", ts);
1588 }
1589 
1590 static bool done = false;
1591 static bool interrupted = false;
1592 
sig_handler(int sig)1593 static void sig_handler(int sig)
1594 {
1595 	done = true;
1596 	interrupted = sig == SIGINT;
1597 }
1598 
trace__fprintf_entry_head(struct trace * trace,struct thread * thread,u64 duration,u64 tstamp,FILE * fp)1599 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1600 					u64 duration, u64 tstamp, FILE *fp)
1601 {
1602 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1603 	printed += fprintf_duration(duration, fp);
1604 
1605 	if (trace->multiple_threads) {
1606 		if (trace->show_comm)
1607 			printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1608 		printed += fprintf(fp, "%d ", thread->tid);
1609 	}
1610 
1611 	return printed;
1612 }
1613 
trace__process_event(struct trace * trace,struct machine * machine,union perf_event * event,struct perf_sample * sample)1614 static int trace__process_event(struct trace *trace, struct machine *machine,
1615 				union perf_event *event, struct perf_sample *sample)
1616 {
1617 	int ret = 0;
1618 
1619 	switch (event->header.type) {
1620 	case PERF_RECORD_LOST:
1621 		color_fprintf(trace->output, PERF_COLOR_RED,
1622 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
1623 		ret = machine__process_lost_event(machine, event, sample);
1624 		break;
1625 	default:
1626 		ret = machine__process_event(machine, event, sample);
1627 		break;
1628 	}
1629 
1630 	return ret;
1631 }
1632 
trace__tool_process(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct machine * machine)1633 static int trace__tool_process(struct perf_tool *tool,
1634 			       union perf_event *event,
1635 			       struct perf_sample *sample,
1636 			       struct machine *machine)
1637 {
1638 	struct trace *trace = container_of(tool, struct trace, tool);
1639 	return trace__process_event(trace, machine, event, sample);
1640 }
1641 
trace__symbols_init(struct trace * trace,struct perf_evlist * evlist)1642 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1643 {
1644 	int err = symbol__init(NULL);
1645 
1646 	if (err)
1647 		return err;
1648 
1649 	trace->host = machine__new_host();
1650 	if (trace->host == NULL)
1651 		return -ENOMEM;
1652 
1653 	if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1654 		return -errno;
1655 
1656 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1657 					    evlist->threads, trace__tool_process, false,
1658 					    trace->opts.proc_map_timeout);
1659 	if (err)
1660 		symbol__exit();
1661 
1662 	return err;
1663 }
1664 
syscall__set_arg_fmts(struct syscall * sc)1665 static int syscall__set_arg_fmts(struct syscall *sc)
1666 {
1667 	struct format_field *field;
1668 	int idx = 0;
1669 
1670 	sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1671 	if (sc->arg_scnprintf == NULL)
1672 		return -1;
1673 
1674 	if (sc->fmt)
1675 		sc->arg_parm = sc->fmt->arg_parm;
1676 
1677 	for (field = sc->args; field; field = field->next) {
1678 		if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1679 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1680 		else if (field->flags & FIELD_IS_POINTER)
1681 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1682 		++idx;
1683 	}
1684 
1685 	return 0;
1686 }
1687 
trace__read_syscall_info(struct trace * trace,int id)1688 static int trace__read_syscall_info(struct trace *trace, int id)
1689 {
1690 	char tp_name[128];
1691 	struct syscall *sc;
1692 	const char *name = audit_syscall_to_name(id, trace->audit.machine);
1693 
1694 	if (name == NULL)
1695 		return -1;
1696 
1697 	if (id > trace->syscalls.max) {
1698 		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1699 
1700 		if (nsyscalls == NULL)
1701 			return -1;
1702 
1703 		if (trace->syscalls.max != -1) {
1704 			memset(nsyscalls + trace->syscalls.max + 1, 0,
1705 			       (id - trace->syscalls.max) * sizeof(*sc));
1706 		} else {
1707 			memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1708 		}
1709 
1710 		trace->syscalls.table = nsyscalls;
1711 		trace->syscalls.max   = id;
1712 	}
1713 
1714 	sc = trace->syscalls.table + id;
1715 	sc->name = name;
1716 
1717 	sc->fmt  = syscall_fmt__find(sc->name);
1718 
1719 	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1720 	sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1721 
1722 	if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1723 		snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1724 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1725 	}
1726 
1727 	if (IS_ERR(sc->tp_format))
1728 		return -1;
1729 
1730 	sc->args = sc->tp_format->format.fields;
1731 	sc->nr_args = sc->tp_format->format.nr_fields;
1732 	/* drop nr field - not relevant here; does not exist on older kernels */
1733 	if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1734 		sc->args = sc->args->next;
1735 		--sc->nr_args;
1736 	}
1737 
1738 	sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1739 
1740 	return syscall__set_arg_fmts(sc);
1741 }
1742 
trace__validate_ev_qualifier(struct trace * trace)1743 static int trace__validate_ev_qualifier(struct trace *trace)
1744 {
1745 	int err = 0, i;
1746 	struct str_node *pos;
1747 
1748 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1749 	trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1750 						 sizeof(trace->ev_qualifier_ids.entries[0]));
1751 
1752 	if (trace->ev_qualifier_ids.entries == NULL) {
1753 		fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1754 		       trace->output);
1755 		err = -EINVAL;
1756 		goto out;
1757 	}
1758 
1759 	i = 0;
1760 
1761 	strlist__for_each(pos, trace->ev_qualifier) {
1762 		const char *sc = pos->s;
1763 		int id = audit_name_to_syscall(sc, trace->audit.machine);
1764 
1765 		if (id < 0) {
1766 			if (err == 0) {
1767 				fputs("Error:\tInvalid syscall ", trace->output);
1768 				err = -EINVAL;
1769 			} else {
1770 				fputs(", ", trace->output);
1771 			}
1772 
1773 			fputs(sc, trace->output);
1774 		}
1775 
1776 		trace->ev_qualifier_ids.entries[i++] = id;
1777 	}
1778 
1779 	if (err < 0) {
1780 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1781 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
1782 		zfree(&trace->ev_qualifier_ids.entries);
1783 		trace->ev_qualifier_ids.nr = 0;
1784 	}
1785 out:
1786 	return err;
1787 }
1788 
1789 /*
1790  * args is to be interpreted as a series of longs but we need to handle
1791  * 8-byte unaligned accesses. args points to raw_data within the event
1792  * and raw_data is guaranteed to be 8-byte unaligned because it is
1793  * preceded by raw_size which is a u32. So we need to copy args to a temp
1794  * variable to read it. Most notably this avoids extended load instructions
1795  * on unaligned addresses
1796  */
1797 
syscall__scnprintf_args(struct syscall * sc,char * bf,size_t size,unsigned char * args,struct trace * trace,struct thread * thread)1798 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1799 				      unsigned char *args, struct trace *trace,
1800 				      struct thread *thread)
1801 {
1802 	size_t printed = 0;
1803 	unsigned char *p;
1804 	unsigned long val;
1805 
1806 	if (sc->args != NULL) {
1807 		struct format_field *field;
1808 		u8 bit = 1;
1809 		struct syscall_arg arg = {
1810 			.idx	= 0,
1811 			.mask	= 0,
1812 			.trace  = trace,
1813 			.thread = thread,
1814 		};
1815 
1816 		for (field = sc->args; field;
1817 		     field = field->next, ++arg.idx, bit <<= 1) {
1818 			if (arg.mask & bit)
1819 				continue;
1820 
1821 			/* special care for unaligned accesses */
1822 			p = args + sizeof(unsigned long) * arg.idx;
1823 			memcpy(&val, p, sizeof(val));
1824 
1825 			/*
1826  			 * Suppress this argument if its value is zero and
1827  			 * and we don't have a string associated in an
1828  			 * strarray for it.
1829  			 */
1830 			if (val == 0 &&
1831 			    !(sc->arg_scnprintf &&
1832 			      sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1833 			      sc->arg_parm[arg.idx]))
1834 				continue;
1835 
1836 			printed += scnprintf(bf + printed, size - printed,
1837 					     "%s%s: ", printed ? ", " : "", field->name);
1838 			if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1839 				arg.val = val;
1840 				if (sc->arg_parm)
1841 					arg.parm = sc->arg_parm[arg.idx];
1842 				printed += sc->arg_scnprintf[arg.idx](bf + printed,
1843 								      size - printed, &arg);
1844 			} else {
1845 				printed += scnprintf(bf + printed, size - printed,
1846 						     "%ld", val);
1847 			}
1848 		}
1849 	} else {
1850 		int i = 0;
1851 
1852 		while (i < 6) {
1853 			/* special care for unaligned accesses */
1854 			p = args + sizeof(unsigned long) * i;
1855 			memcpy(&val, p, sizeof(val));
1856 			printed += scnprintf(bf + printed, size - printed,
1857 					     "%sarg%d: %ld",
1858 					     printed ? ", " : "", i, val);
1859 			++i;
1860 		}
1861 	}
1862 
1863 	return printed;
1864 }
1865 
1866 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1867 				  union perf_event *event,
1868 				  struct perf_sample *sample);
1869 
trace__syscall_info(struct trace * trace,struct perf_evsel * evsel,int id)1870 static struct syscall *trace__syscall_info(struct trace *trace,
1871 					   struct perf_evsel *evsel, int id)
1872 {
1873 
1874 	if (id < 0) {
1875 
1876 		/*
1877 		 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1878 		 * before that, leaving at a higher verbosity level till that is
1879 		 * explained. Reproduced with plain ftrace with:
1880 		 *
1881 		 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1882 		 * grep "NR -1 " /t/trace_pipe
1883 		 *
1884 		 * After generating some load on the machine.
1885  		 */
1886 		if (verbose > 1) {
1887 			static u64 n;
1888 			fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1889 				id, perf_evsel__name(evsel), ++n);
1890 		}
1891 		return NULL;
1892 	}
1893 
1894 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1895 	    trace__read_syscall_info(trace, id))
1896 		goto out_cant_read;
1897 
1898 	if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1899 		goto out_cant_read;
1900 
1901 	return &trace->syscalls.table[id];
1902 
1903 out_cant_read:
1904 	if (verbose) {
1905 		fprintf(trace->output, "Problems reading syscall %d", id);
1906 		if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1907 			fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1908 		fputs(" information\n", trace->output);
1909 	}
1910 	return NULL;
1911 }
1912 
thread__update_stats(struct thread_trace * ttrace,int id,struct perf_sample * sample)1913 static void thread__update_stats(struct thread_trace *ttrace,
1914 				 int id, struct perf_sample *sample)
1915 {
1916 	struct int_node *inode;
1917 	struct stats *stats;
1918 	u64 duration = 0;
1919 
1920 	inode = intlist__findnew(ttrace->syscall_stats, id);
1921 	if (inode == NULL)
1922 		return;
1923 
1924 	stats = inode->priv;
1925 	if (stats == NULL) {
1926 		stats = malloc(sizeof(struct stats));
1927 		if (stats == NULL)
1928 			return;
1929 		init_stats(stats);
1930 		inode->priv = stats;
1931 	}
1932 
1933 	if (ttrace->entry_time && sample->time > ttrace->entry_time)
1934 		duration = sample->time - ttrace->entry_time;
1935 
1936 	update_stats(stats, duration);
1937 }
1938 
trace__printf_interrupted_entry(struct trace * trace,struct perf_sample * sample)1939 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1940 {
1941 	struct thread_trace *ttrace;
1942 	u64 duration;
1943 	size_t printed;
1944 
1945 	if (trace->current == NULL)
1946 		return 0;
1947 
1948 	ttrace = thread__priv(trace->current);
1949 
1950 	if (!ttrace->entry_pending)
1951 		return 0;
1952 
1953 	duration = sample->time - ttrace->entry_time;
1954 
1955 	printed  = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1956 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1957 	ttrace->entry_pending = false;
1958 
1959 	return printed;
1960 }
1961 
trace__sys_enter(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)1962 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1963 			    union perf_event *event __maybe_unused,
1964 			    struct perf_sample *sample)
1965 {
1966 	char *msg;
1967 	void *args;
1968 	size_t printed = 0;
1969 	struct thread *thread;
1970 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1971 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
1972 	struct thread_trace *ttrace;
1973 
1974 	if (sc == NULL)
1975 		return -1;
1976 
1977 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1978 	ttrace = thread__trace(thread, trace->output);
1979 	if (ttrace == NULL)
1980 		goto out_put;
1981 
1982 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1983 
1984 	if (ttrace->entry_str == NULL) {
1985 		ttrace->entry_str = malloc(trace__entry_str_size);
1986 		if (!ttrace->entry_str)
1987 			goto out_put;
1988 	}
1989 
1990 	if (!trace->summary_only)
1991 		trace__printf_interrupted_entry(trace, sample);
1992 
1993 	ttrace->entry_time = sample->time;
1994 	msg = ttrace->entry_str;
1995 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1996 
1997 	printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1998 					   args, trace, thread);
1999 
2000 	if (sc->is_exit) {
2001 		if (!trace->duration_filter && !trace->summary_only) {
2002 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2003 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2004 		}
2005 	} else {
2006 		ttrace->entry_pending = true;
2007 		/* See trace__vfs_getname & trace__sys_exit */
2008 		ttrace->filename.pending_open = false;
2009 	}
2010 
2011 	if (trace->current != thread) {
2012 		thread__put(trace->current);
2013 		trace->current = thread__get(thread);
2014 	}
2015 	err = 0;
2016 out_put:
2017 	thread__put(thread);
2018 	return err;
2019 }
2020 
trace__sys_exit(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2021 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2022 			   union perf_event *event __maybe_unused,
2023 			   struct perf_sample *sample)
2024 {
2025 	long ret;
2026 	u64 duration = 0;
2027 	struct thread *thread;
2028 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2029 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
2030 	struct thread_trace *ttrace;
2031 
2032 	if (sc == NULL)
2033 		return -1;
2034 
2035 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2036 	ttrace = thread__trace(thread, trace->output);
2037 	if (ttrace == NULL)
2038 		goto out_put;
2039 
2040 	if (trace->summary)
2041 		thread__update_stats(ttrace, id, sample);
2042 
2043 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2044 
2045 	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2046 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2047 		ttrace->filename.pending_open = false;
2048 		++trace->stats.vfs_getname;
2049 	}
2050 
2051 	ttrace->exit_time = sample->time;
2052 
2053 	if (ttrace->entry_time) {
2054 		duration = sample->time - ttrace->entry_time;
2055 		if (trace__filter_duration(trace, duration))
2056 			goto out;
2057 	} else if (trace->duration_filter)
2058 		goto out;
2059 
2060 	if (trace->summary_only)
2061 		goto out;
2062 
2063 	trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2064 
2065 	if (ttrace->entry_pending) {
2066 		fprintf(trace->output, "%-70s", ttrace->entry_str);
2067 	} else {
2068 		fprintf(trace->output, " ... [");
2069 		color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2070 		fprintf(trace->output, "]: %s()", sc->name);
2071 	}
2072 
2073 	if (sc->fmt == NULL) {
2074 signed_print:
2075 		fprintf(trace->output, ") = %ld", ret);
2076 	} else if (ret < 0 && sc->fmt->errmsg) {
2077 		char bf[STRERR_BUFSIZE];
2078 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2079 			   *e = audit_errno_to_name(-ret);
2080 
2081 		fprintf(trace->output, ") = -1 %s %s", e, emsg);
2082 	} else if (ret == 0 && sc->fmt->timeout)
2083 		fprintf(trace->output, ") = 0 Timeout");
2084 	else if (sc->fmt->hexret)
2085 		fprintf(trace->output, ") = %#lx", ret);
2086 	else
2087 		goto signed_print;
2088 
2089 	fputc('\n', trace->output);
2090 out:
2091 	ttrace->entry_pending = false;
2092 	err = 0;
2093 out_put:
2094 	thread__put(thread);
2095 	return err;
2096 }
2097 
trace__vfs_getname(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2098 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2099 			      union perf_event *event __maybe_unused,
2100 			      struct perf_sample *sample)
2101 {
2102 	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2103 	struct thread_trace *ttrace;
2104 	size_t filename_len, entry_str_len, to_move;
2105 	ssize_t remaining_space;
2106 	char *pos;
2107 	const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2108 
2109 	if (!thread)
2110 		goto out;
2111 
2112 	ttrace = thread__priv(thread);
2113 	if (!ttrace)
2114 		goto out;
2115 
2116 	filename_len = strlen(filename);
2117 
2118 	if (ttrace->filename.namelen < filename_len) {
2119 		char *f = realloc(ttrace->filename.name, filename_len + 1);
2120 
2121 		if (f == NULL)
2122 				goto out;
2123 
2124 		ttrace->filename.namelen = filename_len;
2125 		ttrace->filename.name = f;
2126 	}
2127 
2128 	strcpy(ttrace->filename.name, filename);
2129 	ttrace->filename.pending_open = true;
2130 
2131 	if (!ttrace->filename.ptr)
2132 		goto out;
2133 
2134 	entry_str_len = strlen(ttrace->entry_str);
2135 	remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2136 	if (remaining_space <= 0)
2137 		goto out;
2138 
2139 	if (filename_len > (size_t)remaining_space) {
2140 		filename += filename_len - remaining_space;
2141 		filename_len = remaining_space;
2142 	}
2143 
2144 	to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2145 	pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2146 	memmove(pos + filename_len, pos, to_move);
2147 	memcpy(pos, filename, filename_len);
2148 
2149 	ttrace->filename.ptr = 0;
2150 	ttrace->filename.entry_str_pos = 0;
2151 out:
2152 	return 0;
2153 }
2154 
trace__sched_stat_runtime(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2155 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2156 				     union perf_event *event __maybe_unused,
2157 				     struct perf_sample *sample)
2158 {
2159         u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2160 	double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2161 	struct thread *thread = machine__findnew_thread(trace->host,
2162 							sample->pid,
2163 							sample->tid);
2164 	struct thread_trace *ttrace = thread__trace(thread, trace->output);
2165 
2166 	if (ttrace == NULL)
2167 		goto out_dump;
2168 
2169 	ttrace->runtime_ms += runtime_ms;
2170 	trace->runtime_ms += runtime_ms;
2171 	thread__put(thread);
2172 	return 0;
2173 
2174 out_dump:
2175 	fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2176 	       evsel->name,
2177 	       perf_evsel__strval(evsel, sample, "comm"),
2178 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2179 	       runtime,
2180 	       perf_evsel__intval(evsel, sample, "vruntime"));
2181 	thread__put(thread);
2182 	return 0;
2183 }
2184 
trace__event_handler(struct trace * trace,struct perf_evsel * evsel,union perf_event * event __maybe_unused,struct perf_sample * sample)2185 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2186 				union perf_event *event __maybe_unused,
2187 				struct perf_sample *sample)
2188 {
2189 	trace__printf_interrupted_entry(trace, sample);
2190 	trace__fprintf_tstamp(trace, sample->time, trace->output);
2191 
2192 	if (trace->trace_syscalls)
2193 		fprintf(trace->output, "(         ): ");
2194 
2195 	fprintf(trace->output, "%s:", evsel->name);
2196 
2197 	if (evsel->tp_format) {
2198 		event_format__fprintf(evsel->tp_format, sample->cpu,
2199 				      sample->raw_data, sample->raw_size,
2200 				      trace->output);
2201 	}
2202 
2203 	fprintf(trace->output, ")\n");
2204 	return 0;
2205 }
2206 
print_location(FILE * f,struct perf_sample * sample,struct addr_location * al,bool print_dso,bool print_sym)2207 static void print_location(FILE *f, struct perf_sample *sample,
2208 			   struct addr_location *al,
2209 			   bool print_dso, bool print_sym)
2210 {
2211 
2212 	if ((verbose || print_dso) && al->map)
2213 		fprintf(f, "%s@", al->map->dso->long_name);
2214 
2215 	if ((verbose || print_sym) && al->sym)
2216 		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2217 			al->addr - al->sym->start);
2218 	else if (al->map)
2219 		fprintf(f, "0x%" PRIx64, al->addr);
2220 	else
2221 		fprintf(f, "0x%" PRIx64, sample->addr);
2222 }
2223 
trace__pgfault(struct trace * trace,struct perf_evsel * evsel,union perf_event * event,struct perf_sample * sample)2224 static int trace__pgfault(struct trace *trace,
2225 			  struct perf_evsel *evsel,
2226 			  union perf_event *event,
2227 			  struct perf_sample *sample)
2228 {
2229 	struct thread *thread;
2230 	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2231 	struct addr_location al;
2232 	char map_type = 'd';
2233 	struct thread_trace *ttrace;
2234 	int err = -1;
2235 
2236 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2237 	ttrace = thread__trace(thread, trace->output);
2238 	if (ttrace == NULL)
2239 		goto out_put;
2240 
2241 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2242 		ttrace->pfmaj++;
2243 	else
2244 		ttrace->pfmin++;
2245 
2246 	if (trace->summary_only)
2247 		goto out;
2248 
2249 	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2250 			      sample->ip, &al);
2251 
2252 	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2253 
2254 	fprintf(trace->output, "%sfault [",
2255 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2256 		"maj" : "min");
2257 
2258 	print_location(trace->output, sample, &al, false, true);
2259 
2260 	fprintf(trace->output, "] => ");
2261 
2262 	thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2263 				   sample->addr, &al);
2264 
2265 	if (!al.map) {
2266 		thread__find_addr_location(thread, cpumode,
2267 					   MAP__FUNCTION, sample->addr, &al);
2268 
2269 		if (al.map)
2270 			map_type = 'x';
2271 		else
2272 			map_type = '?';
2273 	}
2274 
2275 	print_location(trace->output, sample, &al, true, false);
2276 
2277 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2278 out:
2279 	err = 0;
2280 out_put:
2281 	thread__put(thread);
2282 	return err;
2283 }
2284 
skip_sample(struct trace * trace,struct perf_sample * sample)2285 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2286 {
2287 	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2288 	    (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2289 		return false;
2290 
2291 	if (trace->pid_list || trace->tid_list)
2292 		return true;
2293 
2294 	return false;
2295 }
2296 
trace__process_sample(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct perf_evsel * evsel,struct machine * machine __maybe_unused)2297 static int trace__process_sample(struct perf_tool *tool,
2298 				 union perf_event *event,
2299 				 struct perf_sample *sample,
2300 				 struct perf_evsel *evsel,
2301 				 struct machine *machine __maybe_unused)
2302 {
2303 	struct trace *trace = container_of(tool, struct trace, tool);
2304 	int err = 0;
2305 
2306 	tracepoint_handler handler = evsel->handler;
2307 
2308 	if (skip_sample(trace, sample))
2309 		return 0;
2310 
2311 	if (!trace->full_time && trace->base_time == 0)
2312 		trace->base_time = sample->time;
2313 
2314 	if (handler) {
2315 		++trace->nr_events;
2316 		handler(trace, evsel, event, sample);
2317 	}
2318 
2319 	return err;
2320 }
2321 
parse_target_str(struct trace * trace)2322 static int parse_target_str(struct trace *trace)
2323 {
2324 	if (trace->opts.target.pid) {
2325 		trace->pid_list = intlist__new(trace->opts.target.pid);
2326 		if (trace->pid_list == NULL) {
2327 			pr_err("Error parsing process id string\n");
2328 			return -EINVAL;
2329 		}
2330 	}
2331 
2332 	if (trace->opts.target.tid) {
2333 		trace->tid_list = intlist__new(trace->opts.target.tid);
2334 		if (trace->tid_list == NULL) {
2335 			pr_err("Error parsing thread id string\n");
2336 			return -EINVAL;
2337 		}
2338 	}
2339 
2340 	return 0;
2341 }
2342 
trace__record(struct trace * trace,int argc,const char ** argv)2343 static int trace__record(struct trace *trace, int argc, const char **argv)
2344 {
2345 	unsigned int rec_argc, i, j;
2346 	const char **rec_argv;
2347 	const char * const record_args[] = {
2348 		"record",
2349 		"-R",
2350 		"-m", "1024",
2351 		"-c", "1",
2352 	};
2353 
2354 	const char * const sc_args[] = { "-e", };
2355 	unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2356 	const char * const majpf_args[] = { "-e", "major-faults" };
2357 	unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2358 	const char * const minpf_args[] = { "-e", "minor-faults" };
2359 	unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2360 
2361 	/* +1 is for the event string below */
2362 	rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2363 		majpf_args_nr + minpf_args_nr + argc;
2364 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2365 
2366 	if (rec_argv == NULL)
2367 		return -ENOMEM;
2368 
2369 	j = 0;
2370 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2371 		rec_argv[j++] = record_args[i];
2372 
2373 	if (trace->trace_syscalls) {
2374 		for (i = 0; i < sc_args_nr; i++)
2375 			rec_argv[j++] = sc_args[i];
2376 
2377 		/* event string may be different for older kernels - e.g., RHEL6 */
2378 		if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2379 			rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2380 		else if (is_valid_tracepoint("syscalls:sys_enter"))
2381 			rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2382 		else {
2383 			pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2384 			return -1;
2385 		}
2386 	}
2387 
2388 	if (trace->trace_pgfaults & TRACE_PFMAJ)
2389 		for (i = 0; i < majpf_args_nr; i++)
2390 			rec_argv[j++] = majpf_args[i];
2391 
2392 	if (trace->trace_pgfaults & TRACE_PFMIN)
2393 		for (i = 0; i < minpf_args_nr; i++)
2394 			rec_argv[j++] = minpf_args[i];
2395 
2396 	for (i = 0; i < (unsigned int)argc; i++)
2397 		rec_argv[j++] = argv[i];
2398 
2399 	return cmd_record(j, rec_argv, NULL);
2400 }
2401 
2402 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2403 
perf_evlist__add_vfs_getname(struct perf_evlist * evlist)2404 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2405 {
2406 	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2407 
2408 	if (IS_ERR(evsel))
2409 		return false;
2410 
2411 	if (perf_evsel__field(evsel, "pathname") == NULL) {
2412 		perf_evsel__delete(evsel);
2413 		return false;
2414 	}
2415 
2416 	evsel->handler = trace__vfs_getname;
2417 	perf_evlist__add(evlist, evsel);
2418 	return true;
2419 }
2420 
perf_evlist__add_pgfault(struct perf_evlist * evlist,u64 config)2421 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2422 				    u64 config)
2423 {
2424 	struct perf_evsel *evsel;
2425 	struct perf_event_attr attr = {
2426 		.type = PERF_TYPE_SOFTWARE,
2427 		.mmap_data = 1,
2428 	};
2429 
2430 	attr.config = config;
2431 	attr.sample_period = 1;
2432 
2433 	event_attr_init(&attr);
2434 
2435 	evsel = perf_evsel__new(&attr);
2436 	if (!evsel)
2437 		return -ENOMEM;
2438 
2439 	evsel->handler = trace__pgfault;
2440 	perf_evlist__add(evlist, evsel);
2441 
2442 	return 0;
2443 }
2444 
trace__handle_event(struct trace * trace,union perf_event * event,struct perf_sample * sample)2445 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2446 {
2447 	const u32 type = event->header.type;
2448 	struct perf_evsel *evsel;
2449 
2450 	if (!trace->full_time && trace->base_time == 0)
2451 		trace->base_time = sample->time;
2452 
2453 	if (type != PERF_RECORD_SAMPLE) {
2454 		trace__process_event(trace, trace->host, event, sample);
2455 		return;
2456 	}
2457 
2458 	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2459 	if (evsel == NULL) {
2460 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2461 		return;
2462 	}
2463 
2464 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2465 	    sample->raw_data == NULL) {
2466 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2467 		       perf_evsel__name(evsel), sample->tid,
2468 		       sample->cpu, sample->raw_size);
2469 	} else {
2470 		tracepoint_handler handler = evsel->handler;
2471 		handler(trace, evsel, event, sample);
2472 	}
2473 }
2474 
trace__add_syscall_newtp(struct trace * trace)2475 static int trace__add_syscall_newtp(struct trace *trace)
2476 {
2477 	int ret = -1;
2478 	struct perf_evlist *evlist = trace->evlist;
2479 	struct perf_evsel *sys_enter, *sys_exit;
2480 
2481 	sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2482 	if (sys_enter == NULL)
2483 		goto out;
2484 
2485 	if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2486 		goto out_delete_sys_enter;
2487 
2488 	sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2489 	if (sys_exit == NULL)
2490 		goto out_delete_sys_enter;
2491 
2492 	if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2493 		goto out_delete_sys_exit;
2494 
2495 	perf_evlist__add(evlist, sys_enter);
2496 	perf_evlist__add(evlist, sys_exit);
2497 
2498 	trace->syscalls.events.sys_enter = sys_enter;
2499 	trace->syscalls.events.sys_exit  = sys_exit;
2500 
2501 	ret = 0;
2502 out:
2503 	return ret;
2504 
2505 out_delete_sys_exit:
2506 	perf_evsel__delete_priv(sys_exit);
2507 out_delete_sys_enter:
2508 	perf_evsel__delete_priv(sys_enter);
2509 	goto out;
2510 }
2511 
trace__set_ev_qualifier_filter(struct trace * trace)2512 static int trace__set_ev_qualifier_filter(struct trace *trace)
2513 {
2514 	int err = -1;
2515 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2516 						trace->ev_qualifier_ids.nr,
2517 						trace->ev_qualifier_ids.entries);
2518 
2519 	if (filter == NULL)
2520 		goto out_enomem;
2521 
2522 	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2523 		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2524 
2525 	free(filter);
2526 out:
2527 	return err;
2528 out_enomem:
2529 	errno = ENOMEM;
2530 	goto out;
2531 }
2532 
trace__run(struct trace * trace,int argc,const char ** argv)2533 static int trace__run(struct trace *trace, int argc, const char **argv)
2534 {
2535 	struct perf_evlist *evlist = trace->evlist;
2536 	struct perf_evsel *evsel;
2537 	int err = -1, i;
2538 	unsigned long before;
2539 	const bool forks = argc > 0;
2540 	bool draining = false;
2541 
2542 	trace->live = true;
2543 
2544 	if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2545 		goto out_error_raw_syscalls;
2546 
2547 	if (trace->trace_syscalls)
2548 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2549 
2550 	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2551 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2552 		goto out_error_mem;
2553 	}
2554 
2555 	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2556 	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2557 		goto out_error_mem;
2558 
2559 	if (trace->sched &&
2560 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2561 				   trace__sched_stat_runtime))
2562 		goto out_error_sched_stat_runtime;
2563 
2564 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
2565 	if (err < 0) {
2566 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2567 		goto out_delete_evlist;
2568 	}
2569 
2570 	err = trace__symbols_init(trace, evlist);
2571 	if (err < 0) {
2572 		fprintf(trace->output, "Problems initializing symbol libraries!\n");
2573 		goto out_delete_evlist;
2574 	}
2575 
2576 	perf_evlist__config(evlist, &trace->opts);
2577 
2578 	signal(SIGCHLD, sig_handler);
2579 	signal(SIGINT, sig_handler);
2580 
2581 	if (forks) {
2582 		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2583 						    argv, false, NULL);
2584 		if (err < 0) {
2585 			fprintf(trace->output, "Couldn't run the workload!\n");
2586 			goto out_delete_evlist;
2587 		}
2588 	}
2589 
2590 	err = perf_evlist__open(evlist);
2591 	if (err < 0)
2592 		goto out_error_open;
2593 
2594 	/*
2595 	 * Better not use !target__has_task() here because we need to cover the
2596 	 * case where no threads were specified in the command line, but a
2597 	 * workload was, and in that case we will fill in the thread_map when
2598 	 * we fork the workload in perf_evlist__prepare_workload.
2599 	 */
2600 	if (trace->filter_pids.nr > 0)
2601 		err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2602 	else if (thread_map__pid(evlist->threads, 0) == -1)
2603 		err = perf_evlist__set_filter_pid(evlist, getpid());
2604 
2605 	if (err < 0)
2606 		goto out_error_mem;
2607 
2608 	if (trace->ev_qualifier_ids.nr > 0) {
2609 		err = trace__set_ev_qualifier_filter(trace);
2610 		if (err < 0)
2611 			goto out_errno;
2612 
2613 		pr_debug("event qualifier tracepoint filter: %s\n",
2614 			 trace->syscalls.events.sys_exit->filter);
2615 	}
2616 
2617 	err = perf_evlist__apply_filters(evlist, &evsel);
2618 	if (err < 0)
2619 		goto out_error_apply_filters;
2620 
2621 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2622 	if (err < 0)
2623 		goto out_error_mmap;
2624 
2625 	if (!target__none(&trace->opts.target))
2626 		perf_evlist__enable(evlist);
2627 
2628 	if (forks)
2629 		perf_evlist__start_workload(evlist);
2630 
2631 	trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2632 				  evlist->threads->nr > 1 ||
2633 				  perf_evlist__first(evlist)->attr.inherit;
2634 again:
2635 	before = trace->nr_events;
2636 
2637 	for (i = 0; i < evlist->nr_mmaps; i++) {
2638 		union perf_event *event;
2639 
2640 		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2641 			struct perf_sample sample;
2642 
2643 			++trace->nr_events;
2644 
2645 			err = perf_evlist__parse_sample(evlist, event, &sample);
2646 			if (err) {
2647 				fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2648 				goto next_event;
2649 			}
2650 
2651 			trace__handle_event(trace, event, &sample);
2652 next_event:
2653 			perf_evlist__mmap_consume(evlist, i);
2654 
2655 			if (interrupted)
2656 				goto out_disable;
2657 
2658 			if (done && !draining) {
2659 				perf_evlist__disable(evlist);
2660 				draining = true;
2661 			}
2662 		}
2663 	}
2664 
2665 	if (trace->nr_events == before) {
2666 		int timeout = done ? 100 : -1;
2667 
2668 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2669 			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2670 				draining = true;
2671 
2672 			goto again;
2673 		}
2674 	} else {
2675 		goto again;
2676 	}
2677 
2678 out_disable:
2679 	thread__zput(trace->current);
2680 
2681 	perf_evlist__disable(evlist);
2682 
2683 	if (!err) {
2684 		if (trace->summary)
2685 			trace__fprintf_thread_summary(trace, trace->output);
2686 
2687 		if (trace->show_tool_stats) {
2688 			fprintf(trace->output, "Stats:\n "
2689 					       " vfs_getname : %" PRIu64 "\n"
2690 					       " proc_getname: %" PRIu64 "\n",
2691 				trace->stats.vfs_getname,
2692 				trace->stats.proc_getname);
2693 		}
2694 	}
2695 
2696 out_delete_evlist:
2697 	perf_evlist__delete(evlist);
2698 	trace->evlist = NULL;
2699 	trace->live = false;
2700 	return err;
2701 {
2702 	char errbuf[BUFSIZ];
2703 
2704 out_error_sched_stat_runtime:
2705 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2706 	goto out_error;
2707 
2708 out_error_raw_syscalls:
2709 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2710 	goto out_error;
2711 
2712 out_error_mmap:
2713 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2714 	goto out_error;
2715 
2716 out_error_open:
2717 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2718 
2719 out_error:
2720 	fprintf(trace->output, "%s\n", errbuf);
2721 	goto out_delete_evlist;
2722 
2723 out_error_apply_filters:
2724 	fprintf(trace->output,
2725 		"Failed to set filter \"%s\" on event %s with %d (%s)\n",
2726 		evsel->filter, perf_evsel__name(evsel), errno,
2727 		strerror_r(errno, errbuf, sizeof(errbuf)));
2728 	goto out_delete_evlist;
2729 }
2730 out_error_mem:
2731 	fprintf(trace->output, "Not enough memory to run!\n");
2732 	goto out_delete_evlist;
2733 
2734 out_errno:
2735 	fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2736 	goto out_delete_evlist;
2737 }
2738 
trace__replay(struct trace * trace)2739 static int trace__replay(struct trace *trace)
2740 {
2741 	const struct perf_evsel_str_handler handlers[] = {
2742 		{ "probe:vfs_getname",	     trace__vfs_getname, },
2743 	};
2744 	struct perf_data_file file = {
2745 		.path  = input_name,
2746 		.mode  = PERF_DATA_MODE_READ,
2747 		.force = trace->force,
2748 	};
2749 	struct perf_session *session;
2750 	struct perf_evsel *evsel;
2751 	int err = -1;
2752 
2753 	trace->tool.sample	  = trace__process_sample;
2754 	trace->tool.mmap	  = perf_event__process_mmap;
2755 	trace->tool.mmap2	  = perf_event__process_mmap2;
2756 	trace->tool.comm	  = perf_event__process_comm;
2757 	trace->tool.exit	  = perf_event__process_exit;
2758 	trace->tool.fork	  = perf_event__process_fork;
2759 	trace->tool.attr	  = perf_event__process_attr;
2760 	trace->tool.tracing_data = perf_event__process_tracing_data;
2761 	trace->tool.build_id	  = perf_event__process_build_id;
2762 
2763 	trace->tool.ordered_events = true;
2764 	trace->tool.ordering_requires_timestamps = true;
2765 
2766 	/* add tid to output */
2767 	trace->multiple_threads = true;
2768 
2769 	session = perf_session__new(&file, false, &trace->tool);
2770 	if (session == NULL)
2771 		return -1;
2772 
2773 	if (symbol__init(&session->header.env) < 0)
2774 		goto out;
2775 
2776 	trace->host = &session->machines.host;
2777 
2778 	err = perf_session__set_tracepoints_handlers(session, handlers);
2779 	if (err)
2780 		goto out;
2781 
2782 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2783 						     "raw_syscalls:sys_enter");
2784 	/* older kernels have syscalls tp versus raw_syscalls */
2785 	if (evsel == NULL)
2786 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2787 							     "syscalls:sys_enter");
2788 
2789 	if (evsel &&
2790 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2791 	    perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2792 		pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2793 		goto out;
2794 	}
2795 
2796 	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2797 						     "raw_syscalls:sys_exit");
2798 	if (evsel == NULL)
2799 		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2800 							     "syscalls:sys_exit");
2801 	if (evsel &&
2802 	    (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2803 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2804 		pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2805 		goto out;
2806 	}
2807 
2808 	evlist__for_each(session->evlist, evsel) {
2809 		if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2810 		    (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2811 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2812 		     evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2813 			evsel->handler = trace__pgfault;
2814 	}
2815 
2816 	err = parse_target_str(trace);
2817 	if (err != 0)
2818 		goto out;
2819 
2820 	setup_pager();
2821 
2822 	err = perf_session__process_events(session);
2823 	if (err)
2824 		pr_err("Failed to process events, error %d", err);
2825 
2826 	else if (trace->summary)
2827 		trace__fprintf_thread_summary(trace, trace->output);
2828 
2829 out:
2830 	perf_session__delete(session);
2831 
2832 	return err;
2833 }
2834 
trace__fprintf_threads_header(FILE * fp)2835 static size_t trace__fprintf_threads_header(FILE *fp)
2836 {
2837 	size_t printed;
2838 
2839 	printed  = fprintf(fp, "\n Summary of events:\n\n");
2840 
2841 	return printed;
2842 }
2843 
thread__dump_stats(struct thread_trace * ttrace,struct trace * trace,FILE * fp)2844 static size_t thread__dump_stats(struct thread_trace *ttrace,
2845 				 struct trace *trace, FILE *fp)
2846 {
2847 	struct stats *stats;
2848 	size_t printed = 0;
2849 	struct syscall *sc;
2850 	struct int_node *inode = intlist__first(ttrace->syscall_stats);
2851 
2852 	if (inode == NULL)
2853 		return 0;
2854 
2855 	printed += fprintf(fp, "\n");
2856 
2857 	printed += fprintf(fp, "   syscall            calls    total       min       avg       max      stddev\n");
2858 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
2859 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
2860 
2861 	/* each int_node is a syscall */
2862 	while (inode) {
2863 		stats = inode->priv;
2864 		if (stats) {
2865 			double min = (double)(stats->min) / NSEC_PER_MSEC;
2866 			double max = (double)(stats->max) / NSEC_PER_MSEC;
2867 			double avg = avg_stats(stats);
2868 			double pct;
2869 			u64 n = (u64) stats->n;
2870 
2871 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2872 			avg /= NSEC_PER_MSEC;
2873 
2874 			sc = &trace->syscalls.table[inode->i];
2875 			printed += fprintf(fp, "   %-15s", sc->name);
2876 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2877 					   n, avg * n, min, avg);
2878 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2879 		}
2880 
2881 		inode = intlist__next(inode);
2882 	}
2883 
2884 	printed += fprintf(fp, "\n\n");
2885 
2886 	return printed;
2887 }
2888 
2889 /* struct used to pass data to per-thread function */
2890 struct summary_data {
2891 	FILE *fp;
2892 	struct trace *trace;
2893 	size_t printed;
2894 };
2895 
trace__fprintf_one_thread(struct thread * thread,void * priv)2896 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2897 {
2898 	struct summary_data *data = priv;
2899 	FILE *fp = data->fp;
2900 	size_t printed = data->printed;
2901 	struct trace *trace = data->trace;
2902 	struct thread_trace *ttrace = thread__priv(thread);
2903 	double ratio;
2904 
2905 	if (ttrace == NULL)
2906 		return 0;
2907 
2908 	ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2909 
2910 	printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2911 	printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2912 	printed += fprintf(fp, "%.1f%%", ratio);
2913 	if (ttrace->pfmaj)
2914 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2915 	if (ttrace->pfmin)
2916 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2917 	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2918 	printed += thread__dump_stats(ttrace, trace, fp);
2919 
2920 	data->printed += printed;
2921 
2922 	return 0;
2923 }
2924 
trace__fprintf_thread_summary(struct trace * trace,FILE * fp)2925 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2926 {
2927 	struct summary_data data = {
2928 		.fp = fp,
2929 		.trace = trace
2930 	};
2931 	data.printed = trace__fprintf_threads_header(fp);
2932 
2933 	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2934 
2935 	return data.printed;
2936 }
2937 
trace__set_duration(const struct option * opt,const char * str,int unset __maybe_unused)2938 static int trace__set_duration(const struct option *opt, const char *str,
2939 			       int unset __maybe_unused)
2940 {
2941 	struct trace *trace = opt->value;
2942 
2943 	trace->duration_filter = atof(str);
2944 	return 0;
2945 }
2946 
trace__set_filter_pids(const struct option * opt,const char * str,int unset __maybe_unused)2947 static int trace__set_filter_pids(const struct option *opt, const char *str,
2948 				  int unset __maybe_unused)
2949 {
2950 	int ret = -1;
2951 	size_t i;
2952 	struct trace *trace = opt->value;
2953 	/*
2954 	 * FIXME: introduce a intarray class, plain parse csv and create a
2955 	 * { int nr, int entries[] } struct...
2956 	 */
2957 	struct intlist *list = intlist__new(str);
2958 
2959 	if (list == NULL)
2960 		return -1;
2961 
2962 	i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2963 	trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2964 
2965 	if (trace->filter_pids.entries == NULL)
2966 		goto out;
2967 
2968 	trace->filter_pids.entries[0] = getpid();
2969 
2970 	for (i = 1; i < trace->filter_pids.nr; ++i)
2971 		trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2972 
2973 	intlist__delete(list);
2974 	ret = 0;
2975 out:
2976 	return ret;
2977 }
2978 
trace__open_output(struct trace * trace,const char * filename)2979 static int trace__open_output(struct trace *trace, const char *filename)
2980 {
2981 	struct stat st;
2982 
2983 	if (!stat(filename, &st) && st.st_size) {
2984 		char oldname[PATH_MAX];
2985 
2986 		scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2987 		unlink(oldname);
2988 		rename(filename, oldname);
2989 	}
2990 
2991 	trace->output = fopen(filename, "w");
2992 
2993 	return trace->output == NULL ? -errno : 0;
2994 }
2995 
parse_pagefaults(const struct option * opt,const char * str,int unset __maybe_unused)2996 static int parse_pagefaults(const struct option *opt, const char *str,
2997 			    int unset __maybe_unused)
2998 {
2999 	int *trace_pgfaults = opt->value;
3000 
3001 	if (strcmp(str, "all") == 0)
3002 		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3003 	else if (strcmp(str, "maj") == 0)
3004 		*trace_pgfaults |= TRACE_PFMAJ;
3005 	else if (strcmp(str, "min") == 0)
3006 		*trace_pgfaults |= TRACE_PFMIN;
3007 	else
3008 		return -1;
3009 
3010 	return 0;
3011 }
3012 
evlist__set_evsel_handler(struct perf_evlist * evlist,void * handler)3013 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3014 {
3015 	struct perf_evsel *evsel;
3016 
3017 	evlist__for_each(evlist, evsel)
3018 		evsel->handler = handler;
3019 }
3020 
cmd_trace(int argc,const char ** argv,const char * prefix __maybe_unused)3021 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3022 {
3023 	const char *trace_usage[] = {
3024 		"perf trace [<options>] [<command>]",
3025 		"perf trace [<options>] -- <command> [<options>]",
3026 		"perf trace record [<options>] [<command>]",
3027 		"perf trace record [<options>] -- <command> [<options>]",
3028 		NULL
3029 	};
3030 	struct trace trace = {
3031 		.audit = {
3032 			.machine = audit_detect_machine(),
3033 			.open_id = audit_name_to_syscall("open", trace.audit.machine),
3034 		},
3035 		.syscalls = {
3036 			. max = -1,
3037 		},
3038 		.opts = {
3039 			.target = {
3040 				.uid	   = UINT_MAX,
3041 				.uses_mmap = true,
3042 			},
3043 			.user_freq     = UINT_MAX,
3044 			.user_interval = ULLONG_MAX,
3045 			.no_buffering  = true,
3046 			.mmap_pages    = UINT_MAX,
3047 			.proc_map_timeout  = 500,
3048 		},
3049 		.output = stderr,
3050 		.show_comm = true,
3051 		.trace_syscalls = true,
3052 	};
3053 	const char *output_name = NULL;
3054 	const char *ev_qualifier_str = NULL;
3055 	const struct option trace_options[] = {
3056 	OPT_CALLBACK(0, "event", &trace.evlist, "event",
3057 		     "event selector. use 'perf list' to list available events",
3058 		     parse_events_option),
3059 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
3060 		    "show the thread COMM next to its id"),
3061 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3062 	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3063 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
3064 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3065 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3066 		    "trace events on existing process id"),
3067 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3068 		    "trace events on existing thread id"),
3069 	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3070 		     "pids to filter (by the kernel)", trace__set_filter_pids),
3071 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3072 		    "system-wide collection from all CPUs"),
3073 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3074 		    "list of cpus to monitor"),
3075 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3076 		    "child tasks do not inherit counters"),
3077 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3078 		     "number of mmap data pages",
3079 		     perf_evlist__parse_mmap_pages),
3080 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3081 		   "user to profile"),
3082 	OPT_CALLBACK(0, "duration", &trace, "float",
3083 		     "show only events with duration > N.M ms",
3084 		     trace__set_duration),
3085 	OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3086 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3087 	OPT_BOOLEAN('T', "time", &trace.full_time,
3088 		    "Show full timestamp, not time relative to first start"),
3089 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
3090 		    "Show only syscall summary with statistics"),
3091 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
3092 		    "Show all syscalls and summary with statistics"),
3093 	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3094 		     "Trace pagefaults", parse_pagefaults, "maj"),
3095 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3096 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3097 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3098 			"per thread proc mmap processing timeout in ms"),
3099 	OPT_END()
3100 	};
3101 	const char * const trace_subcommands[] = { "record", NULL };
3102 	int err;
3103 	char bf[BUFSIZ];
3104 
3105 	signal(SIGSEGV, sighandler_dump_stack);
3106 	signal(SIGFPE, sighandler_dump_stack);
3107 
3108 	trace.evlist = perf_evlist__new();
3109 
3110 	if (trace.evlist == NULL) {
3111 		pr_err("Not enough memory to run!\n");
3112 		err = -ENOMEM;
3113 		goto out;
3114 	}
3115 
3116 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3117 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3118 
3119 	if (trace.trace_pgfaults) {
3120 		trace.opts.sample_address = true;
3121 		trace.opts.sample_time = true;
3122 	}
3123 
3124 	if (trace.evlist->nr_entries > 0)
3125 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3126 
3127 	if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3128 		return trace__record(&trace, argc-1, &argv[1]);
3129 
3130 	/* summary_only implies summary option, but don't overwrite summary if set */
3131 	if (trace.summary_only)
3132 		trace.summary = trace.summary_only;
3133 
3134 	if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3135 	    trace.evlist->nr_entries == 0 /* Was --events used? */) {
3136 		pr_err("Please specify something to trace.\n");
3137 		return -1;
3138 	}
3139 
3140 	if (output_name != NULL) {
3141 		err = trace__open_output(&trace, output_name);
3142 		if (err < 0) {
3143 			perror("failed to create output file");
3144 			goto out;
3145 		}
3146 	}
3147 
3148 	if (ev_qualifier_str != NULL) {
3149 		const char *s = ev_qualifier_str;
3150 		struct strlist_config slist_config = {
3151 			.dirname = system_path(STRACE_GROUPS_DIR),
3152 		};
3153 
3154 		trace.not_ev_qualifier = *s == '!';
3155 		if (trace.not_ev_qualifier)
3156 			++s;
3157 		trace.ev_qualifier = strlist__new(s, &slist_config);
3158 		if (trace.ev_qualifier == NULL) {
3159 			fputs("Not enough memory to parse event qualifier",
3160 			      trace.output);
3161 			err = -ENOMEM;
3162 			goto out_close;
3163 		}
3164 
3165 		err = trace__validate_ev_qualifier(&trace);
3166 		if (err)
3167 			goto out_close;
3168 	}
3169 
3170 	err = target__validate(&trace.opts.target);
3171 	if (err) {
3172 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3173 		fprintf(trace.output, "%s", bf);
3174 		goto out_close;
3175 	}
3176 
3177 	err = target__parse_uid(&trace.opts.target);
3178 	if (err) {
3179 		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3180 		fprintf(trace.output, "%s", bf);
3181 		goto out_close;
3182 	}
3183 
3184 	if (!argc && target__none(&trace.opts.target))
3185 		trace.opts.target.system_wide = true;
3186 
3187 	if (input_name)
3188 		err = trace__replay(&trace);
3189 	else
3190 		err = trace__run(&trace, argc, argv);
3191 
3192 out_close:
3193 	if (output_name != NULL)
3194 		fclose(trace.output);
3195 out:
3196 	return err;
3197 }
3198