• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/callchain.h"
18 #include "util/cgroup.h"
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 #include "util/data.h"
30 
31 #include <unistd.h>
32 #include <sched.h>
33 #include <sys/mman.h>
34 
35 
36 struct record {
37 	struct perf_tool	tool;
38 	struct record_opts	opts;
39 	u64			bytes_written;
40 	struct perf_data_file	file;
41 	struct perf_evlist	*evlist;
42 	struct perf_session	*session;
43 	const char		*progname;
44 	int			realtime_prio;
45 	bool			no_buildid;
46 	bool			no_buildid_cache;
47 	long			samples;
48 };
49 
record__write(struct record * rec,void * bf,size_t size)50 static int record__write(struct record *rec, void *bf, size_t size)
51 {
52 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
53 		pr_err("failed to write perf data, error: %m\n");
54 		return -1;
55 	}
56 
57 	rec->bytes_written += size;
58 	return 0;
59 }
60 
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)61 static int process_synthesized_event(struct perf_tool *tool,
62 				     union perf_event *event,
63 				     struct perf_sample *sample __maybe_unused,
64 				     struct machine *machine __maybe_unused)
65 {
66 	struct record *rec = container_of(tool, struct record, tool);
67 	return record__write(rec, event, event->header.size);
68 }
69 
record__mmap_read(struct record * rec,int idx)70 static int record__mmap_read(struct record *rec, int idx)
71 {
72 	struct perf_mmap *md = &rec->evlist->mmap[idx];
73 	unsigned int head = perf_mmap__read_head(md);
74 	unsigned int old = md->prev;
75 	unsigned char *data = md->base + page_size;
76 	unsigned long size;
77 	void *buf;
78 	int rc = 0;
79 
80 	if (old == head)
81 		return 0;
82 
83 	rec->samples++;
84 
85 	size = head - old;
86 
87 	if ((old & md->mask) + size != (head & md->mask)) {
88 		buf = &data[old & md->mask];
89 		size = md->mask + 1 - (old & md->mask);
90 		old += size;
91 
92 		if (record__write(rec, buf, size) < 0) {
93 			rc = -1;
94 			goto out;
95 		}
96 	}
97 
98 	buf = &data[old & md->mask];
99 	size = head - old;
100 	old += size;
101 
102 	if (record__write(rec, buf, size) < 0) {
103 		rc = -1;
104 		goto out;
105 	}
106 
107 	md->prev = old;
108 	perf_evlist__mmap_consume(rec->evlist, idx);
109 out:
110 	return rc;
111 }
112 
113 static volatile int done = 0;
114 static volatile int signr = -1;
115 static volatile int child_finished = 0;
116 
sig_handler(int sig)117 static void sig_handler(int sig)
118 {
119 	if (sig == SIGCHLD)
120 		child_finished = 1;
121 	else
122 		signr = sig;
123 
124 	done = 1;
125 }
126 
record__sig_exit(void)127 static void record__sig_exit(void)
128 {
129 	if (signr == -1)
130 		return;
131 
132 	signal(signr, SIG_DFL);
133 	raise(signr);
134 }
135 
record__open(struct record * rec)136 static int record__open(struct record *rec)
137 {
138 	char msg[512];
139 	struct perf_evsel *pos;
140 	struct perf_evlist *evlist = rec->evlist;
141 	struct perf_session *session = rec->session;
142 	struct record_opts *opts = &rec->opts;
143 	int rc = 0;
144 
145 	perf_evlist__config(evlist, opts);
146 
147 	evlist__for_each(evlist, pos) {
148 try_again:
149 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
150 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
151 				if (verbose)
152 					ui__warning("%s\n", msg);
153 				goto try_again;
154 			}
155 
156 			rc = -errno;
157 			perf_evsel__open_strerror(pos, &opts->target,
158 						  errno, msg, sizeof(msg));
159 			ui__error("%s\n", msg);
160 			goto out;
161 		}
162 	}
163 
164 	if (perf_evlist__apply_filters(evlist)) {
165 		error("failed to set filter with %d (%s)\n", errno,
166 			strerror_r(errno, msg, sizeof(msg)));
167 		rc = -1;
168 		goto out;
169 	}
170 
171 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
172 		if (errno == EPERM) {
173 			pr_err("Permission error mapping pages.\n"
174 			       "Consider increasing "
175 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
176 			       "or try again with a smaller value of -m/--mmap_pages.\n"
177 			       "(current value: %u)\n", opts->mmap_pages);
178 			rc = -errno;
179 		} else {
180 			pr_err("failed to mmap with %d (%s)\n", errno,
181 				strerror_r(errno, msg, sizeof(msg)));
182 			rc = -errno;
183 		}
184 		goto out;
185 	}
186 
187 	session->evlist = evlist;
188 	perf_session__set_id_hdr_size(session);
189 out:
190 	return rc;
191 }
192 
process_buildids(struct record * rec)193 static int process_buildids(struct record *rec)
194 {
195 	struct perf_data_file *file  = &rec->file;
196 	struct perf_session *session = rec->session;
197 	u64 start = session->header.data_offset;
198 
199 	u64 size = lseek(file->fd, 0, SEEK_CUR);
200 	if (size == 0)
201 		return 0;
202 
203 	return __perf_session__process_events(session, start,
204 					      size - start,
205 					      size, &build_id__mark_dso_hit_ops);
206 }
207 
perf_event__synthesize_guest_os(struct machine * machine,void * data)208 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
209 {
210 	int err;
211 	struct perf_tool *tool = data;
212 	/*
213 	 *As for guest kernel when processing subcommand record&report,
214 	 *we arrange module mmap prior to guest kernel mmap and trigger
215 	 *a preload dso because default guest module symbols are loaded
216 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
217 	 *method is used to avoid symbol missing when the first addr is
218 	 *in module instead of in guest kernel.
219 	 */
220 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
221 					     machine);
222 	if (err < 0)
223 		pr_err("Couldn't record guest kernel [%d]'s reference"
224 		       " relocation symbol.\n", machine->pid);
225 
226 	/*
227 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
228 	 * have no _text sometimes.
229 	 */
230 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
231 						 machine);
232 	if (err < 0)
233 		pr_err("Couldn't record guest kernel [%d]'s reference"
234 		       " relocation symbol.\n", machine->pid);
235 }
236 
237 static struct perf_event_header finished_round_event = {
238 	.size = sizeof(struct perf_event_header),
239 	.type = PERF_RECORD_FINISHED_ROUND,
240 };
241 
record__mmap_read_all(struct record * rec)242 static int record__mmap_read_all(struct record *rec)
243 {
244 	u64 bytes_written = rec->bytes_written;
245 	int i;
246 	int rc = 0;
247 
248 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
249 		if (rec->evlist->mmap[i].base) {
250 			if (record__mmap_read(rec, i) != 0) {
251 				rc = -1;
252 				goto out;
253 			}
254 		}
255 	}
256 
257 	/*
258 	 * Mark the round finished in case we wrote
259 	 * at least one event.
260 	 */
261 	if (bytes_written != rec->bytes_written)
262 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
263 
264 out:
265 	return rc;
266 }
267 
record__init_features(struct record * rec)268 static void record__init_features(struct record *rec)
269 {
270 	struct perf_session *session = rec->session;
271 	int feat;
272 
273 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
274 		perf_header__set_feat(&session->header, feat);
275 
276 	if (rec->no_buildid)
277 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
278 
279 	if (!have_tracepoints(&rec->evlist->entries))
280 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
281 
282 	if (!rec->opts.branch_stack)
283 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
284 }
285 
286 static volatile int workload_exec_errno;
287 
288 /*
289  * perf_evlist__prepare_workload will send a SIGUSR1
290  * if the fork fails, since we asked by setting its
291  * want_signal to true.
292  */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)293 static void workload_exec_failed_signal(int signo __maybe_unused,
294 					siginfo_t *info,
295 					void *ucontext __maybe_unused)
296 {
297 	workload_exec_errno = info->si_value.sival_int;
298 	done = 1;
299 	child_finished = 1;
300 }
301 
__cmd_record(struct record * rec,int argc,const char ** argv)302 static int __cmd_record(struct record *rec, int argc, const char **argv)
303 {
304 	int err;
305 	int status = 0;
306 	unsigned long waking = 0;
307 	const bool forks = argc > 0;
308 	struct machine *machine;
309 	struct perf_tool *tool = &rec->tool;
310 	struct record_opts *opts = &rec->opts;
311 	struct perf_data_file *file = &rec->file;
312 	struct perf_session *session;
313 	bool disabled = false, draining = false;
314 
315 	rec->progname = argv[0];
316 
317 	atexit(record__sig_exit);
318 	signal(SIGCHLD, sig_handler);
319 	signal(SIGINT, sig_handler);
320 	signal(SIGTERM, sig_handler);
321 
322 	session = perf_session__new(file, false, NULL);
323 	if (session == NULL) {
324 		pr_err("Perf session creation failed.\n");
325 		return -1;
326 	}
327 
328 	rec->session = session;
329 
330 	record__init_features(rec);
331 
332 	if (forks) {
333 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
334 						    argv, file->is_pipe,
335 						    workload_exec_failed_signal);
336 		if (err < 0) {
337 			pr_err("Couldn't run the workload!\n");
338 			status = err;
339 			goto out_delete_session;
340 		}
341 	}
342 
343 	if (record__open(rec) != 0) {
344 		err = -1;
345 		goto out_child;
346 	}
347 
348 	if (!rec->evlist->nr_groups)
349 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
350 
351 	if (file->is_pipe) {
352 		err = perf_header__write_pipe(file->fd);
353 		if (err < 0)
354 			goto out_child;
355 	} else {
356 		err = perf_session__write_header(session, rec->evlist,
357 						 file->fd, false);
358 		if (err < 0)
359 			goto out_child;
360 	}
361 
362 	if (!rec->no_buildid
363 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
364 		pr_err("Couldn't generate buildids. "
365 		       "Use --no-buildid to profile anyway.\n");
366 		err = -1;
367 		goto out_child;
368 	}
369 
370 	machine = &session->machines.host;
371 
372 	if (file->is_pipe) {
373 		err = perf_event__synthesize_attrs(tool, session,
374 						   process_synthesized_event);
375 		if (err < 0) {
376 			pr_err("Couldn't synthesize attrs.\n");
377 			goto out_child;
378 		}
379 
380 		if (have_tracepoints(&rec->evlist->entries)) {
381 			/*
382 			 * FIXME err <= 0 here actually means that
383 			 * there were no tracepoints so its not really
384 			 * an error, just that we don't need to
385 			 * synthesize anything.  We really have to
386 			 * return this more properly and also
387 			 * propagate errors that now are calling die()
388 			 */
389 			err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist,
390 								  process_synthesized_event);
391 			if (err <= 0) {
392 				pr_err("Couldn't record tracing data.\n");
393 				goto out_child;
394 			}
395 			rec->bytes_written += err;
396 		}
397 	}
398 
399 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
400 						 machine);
401 	if (err < 0)
402 		pr_err("Couldn't record kernel reference relocation symbol\n"
403 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
404 		       "Check /proc/kallsyms permission or run as root.\n");
405 
406 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
407 					     machine);
408 	if (err < 0)
409 		pr_err("Couldn't record kernel module information.\n"
410 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
411 		       "Check /proc/modules permission or run as root.\n");
412 
413 	if (perf_guest) {
414 		machines__process_guests(&session->machines,
415 					 perf_event__synthesize_guest_os, tool);
416 	}
417 
418 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
419 					    process_synthesized_event, opts->sample_address);
420 	if (err != 0)
421 		goto out_child;
422 
423 	if (rec->realtime_prio) {
424 		struct sched_param param;
425 
426 		param.sched_priority = rec->realtime_prio;
427 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
428 			pr_err("Could not set realtime priority.\n");
429 			err = -1;
430 			goto out_child;
431 		}
432 	}
433 
434 	/*
435 	 * When perf is starting the traced process, all the events
436 	 * (apart from group members) have enable_on_exec=1 set,
437 	 * so don't spoil it by prematurely enabling them.
438 	 */
439 	if (!target__none(&opts->target) && !opts->initial_delay)
440 		perf_evlist__enable(rec->evlist);
441 
442 	/*
443 	 * Let the child rip
444 	 */
445 	if (forks)
446 		perf_evlist__start_workload(rec->evlist);
447 
448 	if (opts->initial_delay) {
449 		usleep(opts->initial_delay * 1000);
450 		perf_evlist__enable(rec->evlist);
451 	}
452 
453 	for (;;) {
454 		int hits = rec->samples;
455 
456 		if (record__mmap_read_all(rec) < 0) {
457 			err = -1;
458 			goto out_child;
459 		}
460 
461 		if (hits == rec->samples) {
462 			if (done || draining)
463 				break;
464 			err = perf_evlist__poll(rec->evlist, -1);
465 			/*
466 			 * Propagate error, only if there's any. Ignore positive
467 			 * number of returned events and interrupt error.
468 			 */
469 			if (err > 0 || (err < 0 && errno == EINTR))
470 				err = 0;
471 			waking++;
472 
473 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
474 				draining = true;
475 		}
476 
477 		/*
478 		 * When perf is starting the traced process, at the end events
479 		 * die with the process and we wait for that. Thus no need to
480 		 * disable events in this case.
481 		 */
482 		if (done && !disabled && !target__none(&opts->target)) {
483 			perf_evlist__disable(rec->evlist);
484 			disabled = true;
485 		}
486 	}
487 
488 	if (forks && workload_exec_errno) {
489 		char msg[STRERR_BUFSIZE];
490 		const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
491 		pr_err("Workload failed: %s\n", emsg);
492 		err = -1;
493 		goto out_child;
494 	}
495 
496 	if (!quiet) {
497 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
498 
499 		/*
500 		 * Approximate RIP event size: 24 bytes.
501 		 */
502 		fprintf(stderr,
503 			"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
504 			(double)rec->bytes_written / 1024.0 / 1024.0,
505 			file->path,
506 			rec->bytes_written / 24);
507 	}
508 
509 out_child:
510 	if (forks) {
511 		int exit_status;
512 
513 		if (!child_finished)
514 			kill(rec->evlist->workload.pid, SIGTERM);
515 
516 		wait(&exit_status);
517 
518 		if (err < 0)
519 			status = err;
520 		else if (WIFEXITED(exit_status))
521 			status = WEXITSTATUS(exit_status);
522 		else if (WIFSIGNALED(exit_status))
523 			signr = WTERMSIG(exit_status);
524 	} else
525 		status = err;
526 
527 	if (!err && !file->is_pipe) {
528 		rec->session->header.data_size += rec->bytes_written;
529 
530 		if (!rec->no_buildid)
531 			process_buildids(rec);
532 		perf_session__write_header(rec->session, rec->evlist,
533 					   file->fd, true);
534 	}
535 
536 out_delete_session:
537 	perf_session__delete(session);
538 	return status;
539 }
540 
541 #define BRANCH_OPT(n, m) \
542 	{ .name = n, .mode = (m) }
543 
544 #define BRANCH_END { .name = NULL }
545 
546 struct branch_mode {
547 	const char *name;
548 	int mode;
549 };
550 
551 static const struct branch_mode branch_modes[] = {
552 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
553 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
554 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
555 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
556 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
557 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
558 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
559 	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
560 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
561 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
562 	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
563 	BRANCH_END
564 };
565 
566 static int
parse_branch_stack(const struct option * opt,const char * str,int unset)567 parse_branch_stack(const struct option *opt, const char *str, int unset)
568 {
569 #define ONLY_PLM \
570 	(PERF_SAMPLE_BRANCH_USER	|\
571 	 PERF_SAMPLE_BRANCH_KERNEL	|\
572 	 PERF_SAMPLE_BRANCH_HV)
573 
574 	uint64_t *mode = (uint64_t *)opt->value;
575 	const struct branch_mode *br;
576 	char *s, *os = NULL, *p;
577 	int ret = -1;
578 
579 	if (unset)
580 		return 0;
581 
582 	/*
583 	 * cannot set it twice, -b + --branch-filter for instance
584 	 */
585 	if (*mode)
586 		return -1;
587 
588 	/* str may be NULL in case no arg is passed to -b */
589 	if (str) {
590 		/* because str is read-only */
591 		s = os = strdup(str);
592 		if (!s)
593 			return -1;
594 
595 		for (;;) {
596 			p = strchr(s, ',');
597 			if (p)
598 				*p = '\0';
599 
600 			for (br = branch_modes; br->name; br++) {
601 				if (!strcasecmp(s, br->name))
602 					break;
603 			}
604 			if (!br->name) {
605 				ui__warning("unknown branch filter %s,"
606 					    " check man page\n", s);
607 				goto error;
608 			}
609 
610 			*mode |= br->mode;
611 
612 			if (!p)
613 				break;
614 
615 			s = p + 1;
616 		}
617 	}
618 	ret = 0;
619 
620 	/* default to any branch */
621 	if ((*mode & ~ONLY_PLM) == 0) {
622 		*mode = PERF_SAMPLE_BRANCH_ANY;
623 	}
624 error:
625 	free(os);
626 	return ret;
627 }
628 
callchain_debug(void)629 static void callchain_debug(void)
630 {
631 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
632 
633 	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
634 
635 	if (callchain_param.record_mode == CALLCHAIN_DWARF)
636 		pr_debug("callchain: stack dump size %d\n",
637 			 callchain_param.dump_size);
638 }
639 
record_parse_callchain_opt(const struct option * opt __maybe_unused,const char * arg,int unset)640 int record_parse_callchain_opt(const struct option *opt __maybe_unused,
641 			       const char *arg,
642 			       int unset)
643 {
644 	int ret;
645 
646 	callchain_param.enabled = !unset;
647 
648 	/* --no-call-graph */
649 	if (unset) {
650 		callchain_param.record_mode = CALLCHAIN_NONE;
651 		pr_debug("callchain: disabled\n");
652 		return 0;
653 	}
654 
655 	ret = parse_callchain_record_opt(arg);
656 	if (!ret)
657 		callchain_debug();
658 
659 	return ret;
660 }
661 
record_callchain_opt(const struct option * opt __maybe_unused,const char * arg __maybe_unused,int unset __maybe_unused)662 int record_callchain_opt(const struct option *opt __maybe_unused,
663 			 const char *arg __maybe_unused,
664 			 int unset __maybe_unused)
665 {
666 	callchain_param.enabled = true;
667 
668 	if (callchain_param.record_mode == CALLCHAIN_NONE)
669 		callchain_param.record_mode = CALLCHAIN_FP;
670 
671 	callchain_debug();
672 	return 0;
673 }
674 
perf_record_config(const char * var,const char * value,void * cb)675 static int perf_record_config(const char *var, const char *value, void *cb)
676 {
677 	if (!strcmp(var, "record.call-graph"))
678 		var = "call-graph.record-mode"; /* fall-through */
679 
680 	return perf_default_config(var, value, cb);
681 }
682 
683 static const char * const record_usage[] = {
684 	"perf record [<options>] [<command>]",
685 	"perf record [<options>] -- <command> [<options>]",
686 	NULL
687 };
688 
689 /*
690  * XXX Ideally would be local to cmd_record() and passed to a record__new
691  * because we need to have access to it in record__exit, that is called
692  * after cmd_record() exits, but since record_options need to be accessible to
693  * builtin-script, leave it here.
694  *
695  * At least we don't ouch it in all the other functions here directly.
696  *
697  * Just say no to tons of global variables, sigh.
698  */
699 static struct record record = {
700 	.opts = {
701 		.sample_time	     = true,
702 		.mmap_pages	     = UINT_MAX,
703 		.user_freq	     = UINT_MAX,
704 		.user_interval	     = ULLONG_MAX,
705 		.freq		     = 4000,
706 		.target		     = {
707 			.uses_mmap   = true,
708 			.default_per_cpu = true,
709 		},
710 	},
711 };
712 
713 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
714 
715 #ifdef HAVE_DWARF_UNWIND_SUPPORT
716 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
717 #else
718 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
719 #endif
720 
721 /*
722  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
723  * with it and switch to use the library functions in perf_evlist that came
724  * from builtin-record.c, i.e. use record_opts,
725  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
726  * using pipes, etc.
727  */
728 const struct option record_options[] = {
729 	OPT_CALLBACK('e', "event", &record.evlist, "event",
730 		     "event selector. use 'perf list' to list available events",
731 		     parse_events_option),
732 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
733 		     "event filter", parse_filter),
734 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
735 		    "record events on existing process id"),
736 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
737 		    "record events on existing thread id"),
738 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
739 		    "collect data with this RT SCHED_FIFO priority"),
740 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
741 		    "collect data without buffering"),
742 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
743 		    "collect raw sample records from all opened counters"),
744 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
745 			    "system-wide collection from all CPUs"),
746 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
747 		    "list of cpus to monitor"),
748 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
749 	OPT_STRING('o', "output", &record.file.path, "file",
750 		    "output file name"),
751 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
752 			&record.opts.no_inherit_set,
753 			"child tasks do not inherit counters"),
754 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
755 	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
756 		     "number of mmap data pages",
757 		     perf_evlist__parse_mmap_pages),
758 	OPT_BOOLEAN(0, "group", &record.opts.group,
759 		    "put the counters into a counter group"),
760 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
761 			   NULL, "enables call-graph recording" ,
762 			   &record_callchain_opt),
763 	OPT_CALLBACK(0, "call-graph", &record.opts,
764 		     "mode[,dump_size]", record_callchain_help,
765 		     &record_parse_callchain_opt),
766 	OPT_INCR('v', "verbose", &verbose,
767 		    "be more verbose (show counter open errors, etc)"),
768 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
769 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
770 		    "per thread counts"),
771 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
772 		    "Sample addresses"),
773 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
774 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
775 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
776 		    "don't sample"),
777 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
778 		    "do not update the buildid cache"),
779 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
780 		    "do not collect buildids in perf.data"),
781 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
782 		     "monitor event in cgroup name only",
783 		     parse_cgroups),
784 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
785 		  "ms to wait before starting measurement after program start"),
786 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
787 		   "user to profile"),
788 
789 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
790 		     "branch any", "sample any taken branches",
791 		     parse_branch_stack),
792 
793 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
794 		     "branch filter mask", "branch stack filter modes",
795 		     parse_branch_stack),
796 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
797 		    "sample by weight (on special events only)"),
798 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
799 		    "sample transaction flags (special events only)"),
800 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
801 		    "use per-thread mmaps"),
802 	OPT_END()
803 };
804 
cmd_record(int argc,const char ** argv,const char * prefix __maybe_unused)805 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
806 {
807 	int err = -ENOMEM;
808 	struct record *rec = &record;
809 	char errbuf[BUFSIZ];
810 
811 	rec->evlist = perf_evlist__new();
812 	if (rec->evlist == NULL)
813 		return -ENOMEM;
814 
815 	perf_config(perf_record_config, rec);
816 
817 	argc = parse_options(argc, argv, record_options, record_usage,
818 			    PARSE_OPT_STOP_AT_NON_OPTION);
819 	if (!argc && target__none(&rec->opts.target))
820 		usage_with_options(record_usage, record_options);
821 
822 	if (nr_cgroups && !rec->opts.target.system_wide) {
823 		ui__error("cgroup monitoring only available in"
824 			  " system-wide mode\n");
825 		usage_with_options(record_usage, record_options);
826 	}
827 
828 	symbol__init(NULL);
829 
830 	if (symbol_conf.kptr_restrict)
831 		pr_warning(
832 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
833 "check /proc/sys/kernel/kptr_restrict.\n\n"
834 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
835 "file is not found in the buildid cache or in the vmlinux path.\n\n"
836 "Samples in kernel modules won't be resolved at all.\n\n"
837 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
838 "even with a suitable vmlinux or kallsyms file.\n\n");
839 
840 	if (rec->no_buildid_cache || rec->no_buildid)
841 		disable_buildid_cache();
842 
843 	if (rec->evlist->nr_entries == 0 &&
844 	    perf_evlist__add_default(rec->evlist) < 0) {
845 		pr_err("Not enough memory for event selector list\n");
846 		goto out_symbol_exit;
847 	}
848 
849 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
850 		rec->opts.no_inherit = true;
851 
852 	err = target__validate(&rec->opts.target);
853 	if (err) {
854 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
855 		ui__warning("%s", errbuf);
856 	}
857 
858 	err = target__parse_uid(&rec->opts.target);
859 	if (err) {
860 		int saved_errno = errno;
861 
862 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
863 		ui__error("%s", errbuf);
864 
865 		err = -saved_errno;
866 		goto out_symbol_exit;
867 	}
868 
869 	err = -ENOMEM;
870 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
871 		usage_with_options(record_usage, record_options);
872 
873 	if (record_opts__config(&rec->opts)) {
874 		err = -EINVAL;
875 		goto out_symbol_exit;
876 	}
877 
878 	err = __cmd_record(&record, argc, argv);
879 out_symbol_exit:
880 	perf_evlist__delete(rec->evlist);
881 	symbol__exit();
882 	return err;
883 }
884