1 /*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 enum write_mode_t {
35 WRITE_FORCE,
36 WRITE_APPEND
37 };
38
39 struct perf_record {
40 struct perf_tool tool;
41 struct perf_record_opts opts;
42 u64 bytes_written;
43 const char *output_name;
44 struct perf_evlist *evlist;
45 struct perf_session *session;
46 const char *progname;
47 const char *uid_str;
48 int output;
49 unsigned int page_size;
50 int realtime_prio;
51 enum write_mode_t write_mode;
52 bool no_buildid;
53 bool no_buildid_cache;
54 bool force;
55 bool file_new;
56 bool append_file;
57 long samples;
58 off_t post_processing_offset;
59 };
60
advance_output(struct perf_record * rec,size_t size)61 static void advance_output(struct perf_record *rec, size_t size)
62 {
63 rec->bytes_written += size;
64 }
65
write_output(struct perf_record * rec,void * buf,size_t size)66 static void write_output(struct perf_record *rec, void *buf, size_t size)
67 {
68 while (size) {
69 int ret = write(rec->output, buf, size);
70
71 if (ret < 0)
72 die("failed to write");
73
74 size -= ret;
75 buf += ret;
76
77 rec->bytes_written += ret;
78 }
79 }
80
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __used,struct machine * machine __used)81 static int process_synthesized_event(struct perf_tool *tool,
82 union perf_event *event,
83 struct perf_sample *sample __used,
84 struct machine *machine __used)
85 {
86 struct perf_record *rec = container_of(tool, struct perf_record, tool);
87 write_output(rec, event, event->header.size);
88 return 0;
89 }
90
perf_record__mmap_read(struct perf_record * rec,struct perf_mmap * md)91 static void perf_record__mmap_read(struct perf_record *rec,
92 struct perf_mmap *md)
93 {
94 unsigned int head = perf_mmap__read_head(md);
95 unsigned int old = md->prev;
96 unsigned char *data = md->base + rec->page_size;
97 unsigned long size;
98 void *buf;
99
100 if (old == head)
101 return;
102
103 rec->samples++;
104
105 size = head - old;
106
107 if ((old & md->mask) + size != (head & md->mask)) {
108 buf = &data[old & md->mask];
109 size = md->mask + 1 - (old & md->mask);
110 old += size;
111
112 write_output(rec, buf, size);
113 }
114
115 buf = &data[old & md->mask];
116 size = head - old;
117 old += size;
118
119 write_output(rec, buf, size);
120
121 md->prev = old;
122 perf_mmap__write_tail(md, old);
123 }
124
125 static volatile int done = 0;
126 static volatile int signr = -1;
127 static volatile int child_finished = 0;
128
sig_handler(int sig)129 static void sig_handler(int sig)
130 {
131 if (sig == SIGCHLD)
132 child_finished = 1;
133
134 done = 1;
135 signr = sig;
136 }
137
perf_record__sig_exit(int exit_status __used,void * arg)138 static void perf_record__sig_exit(int exit_status __used, void *arg)
139 {
140 struct perf_record *rec = arg;
141 int status;
142
143 if (rec->evlist->workload.pid > 0) {
144 if (!child_finished)
145 kill(rec->evlist->workload.pid, SIGTERM);
146
147 wait(&status);
148 if (WIFSIGNALED(status))
149 psignal(WTERMSIG(status), rec->progname);
150 }
151
152 if (signr == -1 || signr == SIGUSR1)
153 return;
154
155 signal(signr, SIG_DFL);
156 kill(getpid(), signr);
157 }
158
perf_evlist__equal(struct perf_evlist * evlist,struct perf_evlist * other)159 static bool perf_evlist__equal(struct perf_evlist *evlist,
160 struct perf_evlist *other)
161 {
162 struct perf_evsel *pos, *pair;
163
164 if (evlist->nr_entries != other->nr_entries)
165 return false;
166
167 pair = list_entry(other->entries.next, struct perf_evsel, node);
168
169 list_for_each_entry(pos, &evlist->entries, node) {
170 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
171 return false;
172 pair = list_entry(pair->node.next, struct perf_evsel, node);
173 }
174
175 return true;
176 }
177
perf_record__open(struct perf_record * rec)178 static void perf_record__open(struct perf_record *rec)
179 {
180 struct perf_evsel *pos, *first;
181 struct perf_evlist *evlist = rec->evlist;
182 struct perf_session *session = rec->session;
183 struct perf_record_opts *opts = &rec->opts;
184
185 first = list_entry(evlist->entries.next, struct perf_evsel, node);
186
187 perf_evlist__config_attrs(evlist, opts);
188
189 list_for_each_entry(pos, &evlist->entries, node) {
190 struct perf_event_attr *attr = &pos->attr;
191 struct xyarray *group_fd = NULL;
192 /*
193 * Check if parse_single_tracepoint_event has already asked for
194 * PERF_SAMPLE_TIME.
195 *
196 * XXX this is kludgy but short term fix for problems introduced by
197 * eac23d1c that broke 'perf script' by having different sample_types
198 * when using multiple tracepoint events when we use a perf binary
199 * that tries to use sample_id_all on an older kernel.
200 *
201 * We need to move counter creation to perf_session, support
202 * different sample_types, etc.
203 */
204 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
205
206 if (opts->group && pos != first)
207 group_fd = first->fd;
208 fallback_missing_features:
209 if (opts->exclude_guest_missing)
210 attr->exclude_guest = attr->exclude_host = 0;
211 retry_sample_id:
212 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
213 try_again:
214 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
215 opts->group, group_fd) < 0) {
216 int err = errno;
217
218 if (err == EPERM || err == EACCES) {
219 ui__error_paranoid();
220 exit(EXIT_FAILURE);
221 } else if (err == ENODEV && opts->cpu_list) {
222 die("No such device - did you specify"
223 " an out-of-range profile CPU?\n");
224 } else if (err == EINVAL) {
225 if (!opts->exclude_guest_missing &&
226 (attr->exclude_guest || attr->exclude_host)) {
227 pr_debug("Old kernel, cannot exclude "
228 "guest or host samples.\n");
229 opts->exclude_guest_missing = true;
230 goto fallback_missing_features;
231 } else if (!opts->sample_id_all_missing) {
232 /*
233 * Old kernel, no attr->sample_id_type_all field
234 */
235 opts->sample_id_all_missing = true;
236 if (!opts->sample_time && !opts->raw_samples && !time_needed)
237 attr->sample_type &= ~PERF_SAMPLE_TIME;
238
239 goto retry_sample_id;
240 }
241 }
242
243 /*
244 * If it's cycles then fall back to hrtimer
245 * based cpu-clock-tick sw counter, which
246 * is always available even if no PMU support:
247 */
248 if (attr->type == PERF_TYPE_HARDWARE
249 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
250
251 if (verbose)
252 ui__warning("The cycles event is not supported, "
253 "trying to fall back to cpu-clock-ticks\n");
254 attr->type = PERF_TYPE_SOFTWARE;
255 attr->config = PERF_COUNT_SW_CPU_CLOCK;
256 goto try_again;
257 }
258
259 if (err == ENOENT) {
260 ui__warning("The %s event is not supported.\n",
261 event_name(pos));
262 exit(EXIT_FAILURE);
263 }
264
265 printf("\n");
266 error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
267 err, strerror(err));
268
269 #if defined(__i386__) || defined(__x86_64__)
270 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
271 die("No hardware sampling interrupt available."
272 " No APIC? If so then you can boot the kernel"
273 " with the \"lapic\" boot parameter to"
274 " force-enable it.\n");
275 #endif
276
277 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
278 }
279 }
280
281 if (perf_evlist__set_filters(evlist)) {
282 error("failed to set filter with %d (%s)\n", errno,
283 strerror(errno));
284 exit(-1);
285 }
286
287 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
288 if (errno == EPERM)
289 die("Permission error mapping pages.\n"
290 "Consider increasing "
291 "/proc/sys/kernel/perf_event_mlock_kb,\n"
292 "or try again with a smaller value of -m/--mmap_pages.\n"
293 "(current value: %d)\n", opts->mmap_pages);
294 else if (!is_power_of_2(opts->mmap_pages))
295 die("--mmap_pages/-m value must be a power of two.");
296
297 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
298 }
299
300 if (rec->file_new)
301 session->evlist = evlist;
302 else {
303 if (!perf_evlist__equal(session->evlist, evlist)) {
304 fprintf(stderr, "incompatible append\n");
305 exit(-1);
306 }
307 }
308
309 perf_session__update_sample_type(session);
310 }
311
process_buildids(struct perf_record * rec)312 static int process_buildids(struct perf_record *rec)
313 {
314 u64 size = lseek(rec->output, 0, SEEK_CUR);
315
316 if (size == 0)
317 return 0;
318
319 rec->session->fd = rec->output;
320 return __perf_session__process_events(rec->session, rec->post_processing_offset,
321 size - rec->post_processing_offset,
322 size, &build_id__mark_dso_hit_ops);
323 }
324
perf_record__exit(int status __used,void * arg)325 static void perf_record__exit(int status __used, void *arg)
326 {
327 struct perf_record *rec = arg;
328
329 if (!rec->opts.pipe_output) {
330 rec->session->header.data_size += rec->bytes_written;
331
332 if (!rec->no_buildid)
333 process_buildids(rec);
334 perf_session__write_header(rec->session, rec->evlist,
335 rec->output, true);
336 perf_session__delete(rec->session);
337 perf_evlist__delete(rec->evlist);
338 symbol__exit();
339 }
340 }
341
perf_event__synthesize_guest_os(struct machine * machine,void * data)342 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
343 {
344 int err;
345 struct perf_tool *tool = data;
346
347 if (machine__is_host(machine))
348 return;
349
350 /*
351 *As for guest kernel when processing subcommand record&report,
352 *we arrange module mmap prior to guest kernel mmap and trigger
353 *a preload dso because default guest module symbols are loaded
354 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
355 *method is used to avoid symbol missing when the first addr is
356 *in module instead of in guest kernel.
357 */
358 err = perf_event__synthesize_modules(tool, process_synthesized_event,
359 machine);
360 if (err < 0)
361 pr_err("Couldn't record guest kernel [%d]'s reference"
362 " relocation symbol.\n", machine->pid);
363
364 /*
365 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
366 * have no _text sometimes.
367 */
368 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
369 machine, "_text");
370 if (err < 0)
371 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
372 machine, "_stext");
373 if (err < 0)
374 pr_err("Couldn't record guest kernel [%d]'s reference"
375 " relocation symbol.\n", machine->pid);
376 }
377
378 static struct perf_event_header finished_round_event = {
379 .size = sizeof(struct perf_event_header),
380 .type = PERF_RECORD_FINISHED_ROUND,
381 };
382
perf_record__mmap_read_all(struct perf_record * rec)383 static void perf_record__mmap_read_all(struct perf_record *rec)
384 {
385 int i;
386
387 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
388 if (rec->evlist->mmap[i].base)
389 perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
390 }
391
392 if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
393 write_output(rec, &finished_round_event, sizeof(finished_round_event));
394 }
395
__cmd_record(struct perf_record * rec,int argc,const char ** argv)396 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
397 {
398 struct stat st;
399 int flags;
400 int err, output, feat;
401 unsigned long waking = 0;
402 const bool forks = argc > 0;
403 struct machine *machine;
404 struct perf_tool *tool = &rec->tool;
405 struct perf_record_opts *opts = &rec->opts;
406 struct perf_evlist *evsel_list = rec->evlist;
407 const char *output_name = rec->output_name;
408 struct perf_session *session;
409
410 rec->progname = argv[0];
411
412 rec->page_size = sysconf(_SC_PAGE_SIZE);
413
414 on_exit(perf_record__sig_exit, rec);
415 signal(SIGCHLD, sig_handler);
416 signal(SIGINT, sig_handler);
417 signal(SIGUSR1, sig_handler);
418
419 if (!output_name) {
420 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
421 opts->pipe_output = true;
422 else
423 rec->output_name = output_name = "perf.data";
424 }
425 if (output_name) {
426 if (!strcmp(output_name, "-"))
427 opts->pipe_output = true;
428 else if (!stat(output_name, &st) && st.st_size) {
429 if (rec->write_mode == WRITE_FORCE) {
430 char oldname[PATH_MAX];
431 snprintf(oldname, sizeof(oldname), "%s.old",
432 output_name);
433 unlink(oldname);
434 rename(output_name, oldname);
435 }
436 } else if (rec->write_mode == WRITE_APPEND) {
437 rec->write_mode = WRITE_FORCE;
438 }
439 }
440
441 flags = O_CREAT|O_RDWR;
442 if (rec->write_mode == WRITE_APPEND)
443 rec->file_new = 0;
444 else
445 flags |= O_TRUNC;
446
447 if (opts->pipe_output)
448 output = STDOUT_FILENO;
449 else
450 output = open(output_name, flags, S_IRUSR | S_IWUSR);
451 if (output < 0) {
452 perror("failed to create output file");
453 exit(-1);
454 }
455
456 rec->output = output;
457
458 session = perf_session__new(output_name, O_WRONLY,
459 rec->write_mode == WRITE_FORCE, false, NULL);
460 if (session == NULL) {
461 pr_err("Not enough memory for reading perf file header\n");
462 return -1;
463 }
464
465 rec->session = session;
466
467 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
468 perf_header__set_feat(&session->header, feat);
469
470 if (rec->no_buildid)
471 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
472
473 if (!have_tracepoints(&evsel_list->entries))
474 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
475
476 if (!rec->opts.branch_stack)
477 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
478
479 if (!rec->file_new) {
480 err = perf_session__read_header(session, output);
481 if (err < 0)
482 goto out_delete_session;
483 }
484
485 if (forks) {
486 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
487 if (err < 0) {
488 pr_err("Couldn't run the workload!\n");
489 goto out_delete_session;
490 }
491 }
492
493 perf_record__open(rec);
494
495 /*
496 * perf_session__delete(session) will be called at perf_record__exit()
497 */
498 on_exit(perf_record__exit, rec);
499
500 if (opts->pipe_output) {
501 err = perf_header__write_pipe(output);
502 if (err < 0)
503 return err;
504 } else if (rec->file_new) {
505 err = perf_session__write_header(session, evsel_list,
506 output, false);
507 if (err < 0)
508 return err;
509 }
510
511 if (!rec->no_buildid
512 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
513 pr_err("Couldn't generate buildids. "
514 "Use --no-buildid to profile anyway.\n");
515 return -1;
516 }
517
518 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
519
520 machine = perf_session__find_host_machine(session);
521 if (!machine) {
522 pr_err("Couldn't find native kernel information.\n");
523 return -1;
524 }
525
526 if (opts->pipe_output) {
527 err = perf_event__synthesize_attrs(tool, session,
528 process_synthesized_event);
529 if (err < 0) {
530 pr_err("Couldn't synthesize attrs.\n");
531 return err;
532 }
533
534 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
535 machine);
536 if (err < 0) {
537 pr_err("Couldn't synthesize event_types.\n");
538 return err;
539 }
540
541 if (have_tracepoints(&evsel_list->entries)) {
542 /*
543 * FIXME err <= 0 here actually means that
544 * there were no tracepoints so its not really
545 * an error, just that we don't need to
546 * synthesize anything. We really have to
547 * return this more properly and also
548 * propagate errors that now are calling die()
549 */
550 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
551 process_synthesized_event);
552 if (err <= 0) {
553 pr_err("Couldn't record tracing data.\n");
554 return err;
555 }
556 advance_output(rec, err);
557 }
558 }
559
560 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
561 machine, "_text");
562 if (err < 0)
563 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
564 machine, "_stext");
565 if (err < 0)
566 pr_err("Couldn't record kernel reference relocation symbol\n"
567 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
568 "Check /proc/kallsyms permission or run as root.\n");
569
570 err = perf_event__synthesize_modules(tool, process_synthesized_event,
571 machine);
572 if (err < 0)
573 pr_err("Couldn't record kernel module information.\n"
574 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
575 "Check /proc/modules permission or run as root.\n");
576
577 if (perf_guest)
578 perf_session__process_machines(session, tool,
579 perf_event__synthesize_guest_os);
580
581 if (!opts->system_wide)
582 perf_event__synthesize_thread_map(tool, evsel_list->threads,
583 process_synthesized_event,
584 machine);
585 else
586 perf_event__synthesize_threads(tool, process_synthesized_event,
587 machine);
588
589 if (rec->realtime_prio) {
590 struct sched_param param;
591
592 param.sched_priority = rec->realtime_prio;
593 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
594 pr_err("Could not set realtime priority.\n");
595 exit(-1);
596 }
597 }
598
599 perf_evlist__enable(evsel_list);
600
601 /*
602 * Let the child rip
603 */
604 if (forks)
605 perf_evlist__start_workload(evsel_list);
606
607 for (;;) {
608 int hits = rec->samples;
609
610 perf_record__mmap_read_all(rec);
611
612 if (hits == rec->samples) {
613 if (done)
614 break;
615 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
616 waking++;
617 }
618
619 if (done)
620 perf_evlist__disable(evsel_list);
621 }
622
623 if (quiet || signr == SIGUSR1)
624 return 0;
625
626 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
627
628 /*
629 * Approximate RIP event size: 24 bytes.
630 */
631 fprintf(stderr,
632 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
633 (double)rec->bytes_written / 1024.0 / 1024.0,
634 output_name,
635 rec->bytes_written / 24);
636
637 return 0;
638
639 out_delete_session:
640 perf_session__delete(session);
641 return err;
642 }
643
644 #define BRANCH_OPT(n, m) \
645 { .name = n, .mode = (m) }
646
647 #define BRANCH_END { .name = NULL }
648
649 struct branch_mode {
650 const char *name;
651 int mode;
652 };
653
654 static const struct branch_mode branch_modes[] = {
655 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
656 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
657 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
658 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
659 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
660 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
661 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
662 BRANCH_END
663 };
664
665 static int
parse_branch_stack(const struct option * opt,const char * str,int unset)666 parse_branch_stack(const struct option *opt, const char *str, int unset)
667 {
668 #define ONLY_PLM \
669 (PERF_SAMPLE_BRANCH_USER |\
670 PERF_SAMPLE_BRANCH_KERNEL |\
671 PERF_SAMPLE_BRANCH_HV)
672
673 uint64_t *mode = (uint64_t *)opt->value;
674 const struct branch_mode *br;
675 char *s, *os = NULL, *p;
676 int ret = -1;
677
678 if (unset)
679 return 0;
680
681 /*
682 * cannot set it twice, -b + --branch-filter for instance
683 */
684 if (*mode)
685 return -1;
686
687 /* str may be NULL in case no arg is passed to -b */
688 if (str) {
689 /* because str is read-only */
690 s = os = strdup(str);
691 if (!s)
692 return -1;
693
694 for (;;) {
695 p = strchr(s, ',');
696 if (p)
697 *p = '\0';
698
699 for (br = branch_modes; br->name; br++) {
700 if (!strcasecmp(s, br->name))
701 break;
702 }
703 if (!br->name) {
704 ui__warning("unknown branch filter %s,"
705 " check man page\n", s);
706 goto error;
707 }
708
709 *mode |= br->mode;
710
711 if (!p)
712 break;
713
714 s = p + 1;
715 }
716 }
717 ret = 0;
718
719 /* default to any branch */
720 if ((*mode & ~ONLY_PLM) == 0) {
721 *mode = PERF_SAMPLE_BRANCH_ANY;
722 }
723 error:
724 free(os);
725 return ret;
726 }
727
728 static const char * const record_usage[] = {
729 "perf record [<options>] [<command>]",
730 "perf record [<options>] -- <command> [<options>]",
731 NULL
732 };
733
734 /*
735 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
736 * because we need to have access to it in perf_record__exit, that is called
737 * after cmd_record() exits, but since record_options need to be accessible to
738 * builtin-script, leave it here.
739 *
740 * At least we don't ouch it in all the other functions here directly.
741 *
742 * Just say no to tons of global variables, sigh.
743 */
744 static struct perf_record record = {
745 .opts = {
746 .mmap_pages = UINT_MAX,
747 .user_freq = UINT_MAX,
748 .user_interval = ULLONG_MAX,
749 .freq = 1000,
750 },
751 .write_mode = WRITE_FORCE,
752 .file_new = true,
753 };
754
755 /*
756 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
757 * with it and switch to use the library functions in perf_evlist that came
758 * from builtin-record.c, i.e. use perf_record_opts,
759 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
760 * using pipes, etc.
761 */
762 const struct option record_options[] = {
763 OPT_CALLBACK('e', "event", &record.evlist, "event",
764 "event selector. use 'perf list' to list available events",
765 parse_events_option),
766 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
767 "event filter", parse_filter),
768 OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
769 "record events on existing process id"),
770 OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
771 "record events on existing thread id"),
772 OPT_INTEGER('r', "realtime", &record.realtime_prio,
773 "collect data with this RT SCHED_FIFO priority"),
774 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
775 "collect data without buffering"),
776 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
777 "collect raw sample records from all opened counters"),
778 OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
779 "system-wide collection from all CPUs"),
780 OPT_BOOLEAN('A', "append", &record.append_file,
781 "append to the output file to do incremental profiling"),
782 OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
783 "list of cpus to monitor"),
784 OPT_BOOLEAN('f', "force", &record.force,
785 "overwrite existing data file (deprecated)"),
786 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
787 OPT_STRING('o', "output", &record.output_name, "file",
788 "output file name"),
789 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
790 "child tasks do not inherit counters"),
791 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
792 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
793 "number of mmap data pages"),
794 OPT_BOOLEAN(0, "group", &record.opts.group,
795 "put the counters into a counter group"),
796 OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
797 "do call-graph (stack chain/backtrace) recording"),
798 OPT_INCR('v', "verbose", &verbose,
799 "be more verbose (show counter open errors, etc)"),
800 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
801 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
802 "per thread counts"),
803 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
804 "Sample addresses"),
805 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
806 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
807 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
808 "don't sample"),
809 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
810 "do not update the buildid cache"),
811 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
812 "do not collect buildids in perf.data"),
813 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
814 "monitor event in cgroup name only",
815 parse_cgroups),
816 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
817
818 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
819 "branch any", "sample any taken branches",
820 parse_branch_stack),
821
822 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
823 "branch filter mask", "branch stack filter modes",
824 parse_branch_stack),
825 OPT_END()
826 };
827
cmd_record(int argc,const char ** argv,const char * prefix __used)828 int cmd_record(int argc, const char **argv, const char *prefix __used)
829 {
830 int err = -ENOMEM;
831 struct perf_evsel *pos;
832 struct perf_evlist *evsel_list;
833 struct perf_record *rec = &record;
834
835 perf_header__set_cmdline(argc, argv);
836
837 evsel_list = perf_evlist__new(NULL, NULL);
838 if (evsel_list == NULL)
839 return -ENOMEM;
840
841 rec->evlist = evsel_list;
842
843 argc = parse_options(argc, argv, record_options, record_usage,
844 PARSE_OPT_STOP_AT_NON_OPTION);
845 if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
846 !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
847 usage_with_options(record_usage, record_options);
848
849 if (rec->force && rec->append_file) {
850 fprintf(stderr, "Can't overwrite and append at the same time."
851 " You need to choose between -f and -A");
852 usage_with_options(record_usage, record_options);
853 } else if (rec->append_file) {
854 rec->write_mode = WRITE_APPEND;
855 } else {
856 rec->write_mode = WRITE_FORCE;
857 }
858
859 if (nr_cgroups && !rec->opts.system_wide) {
860 fprintf(stderr, "cgroup monitoring only available in"
861 " system-wide mode\n");
862 usage_with_options(record_usage, record_options);
863 }
864
865 symbol__init();
866
867 if (symbol_conf.kptr_restrict)
868 pr_warning(
869 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
870 "check /proc/sys/kernel/kptr_restrict.\n\n"
871 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
872 "file is not found in the buildid cache or in the vmlinux path.\n\n"
873 "Samples in kernel modules won't be resolved at all.\n\n"
874 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
875 "even with a suitable vmlinux or kallsyms file.\n\n");
876
877 if (rec->no_buildid_cache || rec->no_buildid)
878 disable_buildid_cache();
879
880 if (evsel_list->nr_entries == 0 &&
881 perf_evlist__add_default(evsel_list) < 0) {
882 pr_err("Not enough memory for event selector list\n");
883 goto out_symbol_exit;
884 }
885
886 rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
887 rec->opts.target_pid);
888 if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
889 goto out_free_fd;
890
891 if (rec->opts.target_pid)
892 rec->opts.target_tid = rec->opts.target_pid;
893
894 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
895 rec->opts.target_tid, rec->opts.uid,
896 rec->opts.cpu_list) < 0)
897 usage_with_options(record_usage, record_options);
898
899 list_for_each_entry(pos, &evsel_list->entries, node) {
900 if (perf_header__push_event(pos->attr.config, event_name(pos)))
901 goto out_free_fd;
902 }
903
904 if (rec->opts.user_interval != ULLONG_MAX)
905 rec->opts.default_interval = rec->opts.user_interval;
906 if (rec->opts.user_freq != UINT_MAX)
907 rec->opts.freq = rec->opts.user_freq;
908
909 /*
910 * User specified count overrides default frequency.
911 */
912 if (rec->opts.default_interval)
913 rec->opts.freq = 0;
914 else if (rec->opts.freq) {
915 rec->opts.default_interval = rec->opts.freq;
916 } else {
917 fprintf(stderr, "frequency and count are zero, aborting\n");
918 err = -EINVAL;
919 goto out_free_fd;
920 }
921
922 err = __cmd_record(&record, argc, argv);
923 out_free_fd:
924 perf_evlist__delete_maps(evsel_list);
925 out_symbol_exit:
926 symbol__exit();
927 return err;
928 }
929