1 /*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8 #include "builtin.h"
9
10 #include "perf.h"
11
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include <subcmd/parse-options.h>
15 #include "util/parse-events.h"
16 #include "util/config.h"
17
18 #include "util/callchain.h"
19 #include "util/cgroup.h"
20 #include "util/header.h"
21 #include "util/event.h"
22 #include "util/evlist.h"
23 #include "util/evsel.h"
24 #include "util/debug.h"
25 #include "util/drv_configs.h"
26 #include "util/session.h"
27 #include "util/tool.h"
28 #include "util/symbol.h"
29 #include "util/cpumap.h"
30 #include "util/thread_map.h"
31 #include "util/data.h"
32 #include "util/perf_regs.h"
33 #include "util/auxtrace.h"
34 #include "util/tsc.h"
35 #include "util/parse-branch-options.h"
36 #include "util/parse-regs-options.h"
37 #include "util/llvm-utils.h"
38 #include "util/bpf-loader.h"
39 #include "util/trigger.h"
40 #include "asm/bug.h"
41
42 #include <unistd.h>
43 #include <sched.h>
44 #include <sys/mman.h>
45 #include <asm/bug.h>
46 #include <linux/time64.h>
47
48 struct record {
49 struct perf_tool tool;
50 struct record_opts opts;
51 u64 bytes_written;
52 struct perf_data_file file;
53 struct auxtrace_record *itr;
54 struct perf_evlist *evlist;
55 struct perf_session *session;
56 const char *progname;
57 int realtime_prio;
58 bool no_buildid;
59 bool no_buildid_set;
60 bool no_buildid_cache;
61 bool no_buildid_cache_set;
62 bool buildid_all;
63 bool timestamp_filename;
64 bool switch_output;
65 unsigned long long samples;
66 };
67
record__write(struct record * rec,void * bf,size_t size)68 static int record__write(struct record *rec, void *bf, size_t size)
69 {
70 if (perf_data_file__write(rec->session->file, bf, size) < 0) {
71 pr_err("failed to write perf data, error: %m\n");
72 return -1;
73 }
74
75 rec->bytes_written += size;
76 return 0;
77 }
78
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)79 static int process_synthesized_event(struct perf_tool *tool,
80 union perf_event *event,
81 struct perf_sample *sample __maybe_unused,
82 struct machine *machine __maybe_unused)
83 {
84 struct record *rec = container_of(tool, struct record, tool);
85 return record__write(rec, event, event->header.size);
86 }
87
88 static int
backward_rb_find_range(void * buf,int mask,u64 head,u64 * start,u64 * end)89 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
90 {
91 struct perf_event_header *pheader;
92 u64 evt_head = head;
93 int size = mask + 1;
94
95 pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
96 pheader = (struct perf_event_header *)(buf + (head & mask));
97 *start = head;
98 while (true) {
99 if (evt_head - head >= (unsigned int)size) {
100 pr_debug("Finished reading backward ring buffer: rewind\n");
101 if (evt_head - head > (unsigned int)size)
102 evt_head -= pheader->size;
103 *end = evt_head;
104 return 0;
105 }
106
107 pheader = (struct perf_event_header *)(buf + (evt_head & mask));
108
109 if (pheader->size == 0) {
110 pr_debug("Finished reading backward ring buffer: get start\n");
111 *end = evt_head;
112 return 0;
113 }
114
115 evt_head += pheader->size;
116 pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
117 }
118 WARN_ONCE(1, "Shouldn't get here\n");
119 return -1;
120 }
121
122 static int
rb_find_range(void * data,int mask,u64 head,u64 old,u64 * start,u64 * end,bool backward)123 rb_find_range(void *data, int mask, u64 head, u64 old,
124 u64 *start, u64 *end, bool backward)
125 {
126 if (!backward) {
127 *start = old;
128 *end = head;
129 return 0;
130 }
131
132 return backward_rb_find_range(data, mask, head, start, end);
133 }
134
135 static int
record__mmap_read(struct record * rec,struct perf_mmap * md,bool overwrite,bool backward)136 record__mmap_read(struct record *rec, struct perf_mmap *md,
137 bool overwrite, bool backward)
138 {
139 u64 head = perf_mmap__read_head(md);
140 u64 old = md->prev;
141 u64 end = head, start = old;
142 unsigned char *data = md->base + page_size;
143 unsigned long size;
144 void *buf;
145 int rc = 0;
146
147 if (rb_find_range(data, md->mask, head,
148 old, &start, &end, backward))
149 return -1;
150
151 if (start == end)
152 return 0;
153
154 rec->samples++;
155
156 size = end - start;
157 if (size > (unsigned long)(md->mask) + 1) {
158 WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
159
160 md->prev = head;
161 perf_mmap__consume(md, overwrite || backward);
162 return 0;
163 }
164
165 if ((start & md->mask) + size != (end & md->mask)) {
166 buf = &data[start & md->mask];
167 size = md->mask + 1 - (start & md->mask);
168 start += size;
169
170 if (record__write(rec, buf, size) < 0) {
171 rc = -1;
172 goto out;
173 }
174 }
175
176 buf = &data[start & md->mask];
177 size = end - start;
178 start += size;
179
180 if (record__write(rec, buf, size) < 0) {
181 rc = -1;
182 goto out;
183 }
184
185 md->prev = head;
186 perf_mmap__consume(md, overwrite || backward);
187 out:
188 return rc;
189 }
190
191 static volatile int done;
192 static volatile int signr = -1;
193 static volatile int child_finished;
194
195 static volatile int auxtrace_record__snapshot_started;
196 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
197 static DEFINE_TRIGGER(switch_output_trigger);
198
sig_handler(int sig)199 static void sig_handler(int sig)
200 {
201 if (sig == SIGCHLD)
202 child_finished = 1;
203 else
204 signr = sig;
205
206 done = 1;
207 }
208
record__sig_exit(void)209 static void record__sig_exit(void)
210 {
211 if (signr == -1)
212 return;
213
214 signal(signr, SIG_DFL);
215 raise(signr);
216 }
217
218 #ifdef HAVE_AUXTRACE_SUPPORT
219
record__process_auxtrace(struct perf_tool * tool,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)220 static int record__process_auxtrace(struct perf_tool *tool,
221 union perf_event *event, void *data1,
222 size_t len1, void *data2, size_t len2)
223 {
224 struct record *rec = container_of(tool, struct record, tool);
225 struct perf_data_file *file = &rec->file;
226 size_t padding;
227 u8 pad[8] = {0};
228
229 if (!perf_data_file__is_pipe(file)) {
230 off_t file_offset;
231 int fd = perf_data_file__fd(file);
232 int err;
233
234 file_offset = lseek(fd, 0, SEEK_CUR);
235 if (file_offset == -1)
236 return -1;
237 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
238 event, file_offset);
239 if (err)
240 return err;
241 }
242
243 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
244 padding = (len1 + len2) & 7;
245 if (padding)
246 padding = 8 - padding;
247
248 record__write(rec, event, event->header.size);
249 record__write(rec, data1, len1);
250 if (len2)
251 record__write(rec, data2, len2);
252 record__write(rec, &pad, padding);
253
254 return 0;
255 }
256
record__auxtrace_mmap_read(struct record * rec,struct auxtrace_mmap * mm)257 static int record__auxtrace_mmap_read(struct record *rec,
258 struct auxtrace_mmap *mm)
259 {
260 int ret;
261
262 ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
263 record__process_auxtrace);
264 if (ret < 0)
265 return ret;
266
267 if (ret)
268 rec->samples++;
269
270 return 0;
271 }
272
record__auxtrace_mmap_read_snapshot(struct record * rec,struct auxtrace_mmap * mm)273 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
274 struct auxtrace_mmap *mm)
275 {
276 int ret;
277
278 ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
279 record__process_auxtrace,
280 rec->opts.auxtrace_snapshot_size);
281 if (ret < 0)
282 return ret;
283
284 if (ret)
285 rec->samples++;
286
287 return 0;
288 }
289
record__auxtrace_read_snapshot_all(struct record * rec)290 static int record__auxtrace_read_snapshot_all(struct record *rec)
291 {
292 int i;
293 int rc = 0;
294
295 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
296 struct auxtrace_mmap *mm =
297 &rec->evlist->mmap[i].auxtrace_mmap;
298
299 if (!mm->base)
300 continue;
301
302 if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
303 rc = -1;
304 goto out;
305 }
306 }
307 out:
308 return rc;
309 }
310
record__read_auxtrace_snapshot(struct record * rec)311 static void record__read_auxtrace_snapshot(struct record *rec)
312 {
313 pr_debug("Recording AUX area tracing snapshot\n");
314 if (record__auxtrace_read_snapshot_all(rec) < 0) {
315 trigger_error(&auxtrace_snapshot_trigger);
316 } else {
317 if (auxtrace_record__snapshot_finish(rec->itr))
318 trigger_error(&auxtrace_snapshot_trigger);
319 else
320 trigger_ready(&auxtrace_snapshot_trigger);
321 }
322 }
323
324 #else
325
326 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct auxtrace_mmap * mm __maybe_unused)327 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
328 struct auxtrace_mmap *mm __maybe_unused)
329 {
330 return 0;
331 }
332
333 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused)334 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
335 {
336 }
337
338 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)339 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
340 {
341 return 0;
342 }
343
344 #endif
345
record__mmap_evlist(struct record * rec,struct perf_evlist * evlist)346 static int record__mmap_evlist(struct record *rec,
347 struct perf_evlist *evlist)
348 {
349 struct record_opts *opts = &rec->opts;
350 char msg[512];
351
352 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
353 opts->auxtrace_mmap_pages,
354 opts->auxtrace_snapshot_mode) < 0) {
355 if (errno == EPERM) {
356 pr_err("Permission error mapping pages.\n"
357 "Consider increasing "
358 "/proc/sys/kernel/perf_event_mlock_kb,\n"
359 "or try again with a smaller value of -m/--mmap_pages.\n"
360 "(current value: %u,%u)\n",
361 opts->mmap_pages, opts->auxtrace_mmap_pages);
362 return -errno;
363 } else {
364 pr_err("failed to mmap with %d (%s)\n", errno,
365 str_error_r(errno, msg, sizeof(msg)));
366 if (errno)
367 return -errno;
368 else
369 return -EINVAL;
370 }
371 }
372 return 0;
373 }
374
record__mmap(struct record * rec)375 static int record__mmap(struct record *rec)
376 {
377 return record__mmap_evlist(rec, rec->evlist);
378 }
379
record__open(struct record * rec)380 static int record__open(struct record *rec)
381 {
382 char msg[512];
383 struct perf_evsel *pos;
384 struct perf_evlist *evlist = rec->evlist;
385 struct perf_session *session = rec->session;
386 struct record_opts *opts = &rec->opts;
387 struct perf_evsel_config_term *err_term;
388 int rc = 0;
389
390 perf_evlist__config(evlist, opts, &callchain_param);
391
392 evlist__for_each_entry(evlist, pos) {
393 try_again:
394 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
395 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
396 if (verbose)
397 ui__warning("%s\n", msg);
398 goto try_again;
399 }
400
401 rc = -errno;
402 perf_evsel__open_strerror(pos, &opts->target,
403 errno, msg, sizeof(msg));
404 ui__error("%s\n", msg);
405 goto out;
406 }
407 }
408
409 if (perf_evlist__apply_filters(evlist, &pos)) {
410 error("failed to set filter \"%s\" on event %s with %d (%s)\n",
411 pos->filter, perf_evsel__name(pos), errno,
412 str_error_r(errno, msg, sizeof(msg)));
413 rc = -1;
414 goto out;
415 }
416
417 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
418 error("failed to set config \"%s\" on event %s with %d (%s)\n",
419 err_term->val.drv_cfg, perf_evsel__name(pos), errno,
420 str_error_r(errno, msg, sizeof(msg)));
421 rc = -1;
422 goto out;
423 }
424
425 rc = record__mmap(rec);
426 if (rc)
427 goto out;
428
429 session->evlist = evlist;
430 perf_session__set_id_hdr_size(session);
431 out:
432 return rc;
433 }
434
process_sample_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct perf_evsel * evsel,struct machine * machine)435 static int process_sample_event(struct perf_tool *tool,
436 union perf_event *event,
437 struct perf_sample *sample,
438 struct perf_evsel *evsel,
439 struct machine *machine)
440 {
441 struct record *rec = container_of(tool, struct record, tool);
442
443 rec->samples++;
444
445 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
446 }
447
process_buildids(struct record * rec)448 static int process_buildids(struct record *rec)
449 {
450 struct perf_data_file *file = &rec->file;
451 struct perf_session *session = rec->session;
452
453 if (file->size == 0)
454 return 0;
455
456 /*
457 * During this process, it'll load kernel map and replace the
458 * dso->long_name to a real pathname it found. In this case
459 * we prefer the vmlinux path like
460 * /lib/modules/3.16.4/build/vmlinux
461 *
462 * rather than build-id path (in debug directory).
463 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
464 */
465 symbol_conf.ignore_vmlinux_buildid = true;
466
467 /*
468 * If --buildid-all is given, it marks all DSO regardless of hits,
469 * so no need to process samples.
470 */
471 if (rec->buildid_all)
472 rec->tool.sample = NULL;
473
474 return perf_session__process_events(session);
475 }
476
perf_event__synthesize_guest_os(struct machine * machine,void * data)477 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
478 {
479 int err;
480 struct perf_tool *tool = data;
481 /*
482 *As for guest kernel when processing subcommand record&report,
483 *we arrange module mmap prior to guest kernel mmap and trigger
484 *a preload dso because default guest module symbols are loaded
485 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
486 *method is used to avoid symbol missing when the first addr is
487 *in module instead of in guest kernel.
488 */
489 err = perf_event__synthesize_modules(tool, process_synthesized_event,
490 machine);
491 if (err < 0)
492 pr_err("Couldn't record guest kernel [%d]'s reference"
493 " relocation symbol.\n", machine->pid);
494
495 /*
496 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
497 * have no _text sometimes.
498 */
499 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
500 machine);
501 if (err < 0)
502 pr_err("Couldn't record guest kernel [%d]'s reference"
503 " relocation symbol.\n", machine->pid);
504 }
505
506 static struct perf_event_header finished_round_event = {
507 .size = sizeof(struct perf_event_header),
508 .type = PERF_RECORD_FINISHED_ROUND,
509 };
510
record__mmap_read_evlist(struct record * rec,struct perf_evlist * evlist,bool backward)511 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
512 bool backward)
513 {
514 u64 bytes_written = rec->bytes_written;
515 int i;
516 int rc = 0;
517 struct perf_mmap *maps;
518
519 if (!evlist)
520 return 0;
521
522 maps = backward ? evlist->backward_mmap : evlist->mmap;
523 if (!maps)
524 return 0;
525
526 if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
527 return 0;
528
529 for (i = 0; i < evlist->nr_mmaps; i++) {
530 struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
531
532 if (maps[i].base) {
533 if (record__mmap_read(rec, &maps[i],
534 evlist->overwrite, backward) != 0) {
535 rc = -1;
536 goto out;
537 }
538 }
539
540 if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
541 record__auxtrace_mmap_read(rec, mm) != 0) {
542 rc = -1;
543 goto out;
544 }
545 }
546
547 /*
548 * Mark the round finished in case we wrote
549 * at least one event.
550 */
551 if (bytes_written != rec->bytes_written)
552 rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
553
554 if (backward)
555 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
556 out:
557 return rc;
558 }
559
record__mmap_read_all(struct record * rec)560 static int record__mmap_read_all(struct record *rec)
561 {
562 int err;
563
564 err = record__mmap_read_evlist(rec, rec->evlist, false);
565 if (err)
566 return err;
567
568 return record__mmap_read_evlist(rec, rec->evlist, true);
569 }
570
record__init_features(struct record * rec)571 static void record__init_features(struct record *rec)
572 {
573 struct perf_session *session = rec->session;
574 int feat;
575
576 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
577 perf_header__set_feat(&session->header, feat);
578
579 if (rec->no_buildid)
580 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
581
582 if (!have_tracepoints(&rec->evlist->entries))
583 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
584
585 if (!rec->opts.branch_stack)
586 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
587
588 if (!rec->opts.full_auxtrace)
589 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
590
591 perf_header__clear_feat(&session->header, HEADER_STAT);
592 }
593
594 static void
record__finish_output(struct record * rec)595 record__finish_output(struct record *rec)
596 {
597 struct perf_data_file *file = &rec->file;
598 int fd = perf_data_file__fd(file);
599
600 if (file->is_pipe)
601 return;
602
603 rec->session->header.data_size += rec->bytes_written;
604 file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
605
606 if (!rec->no_buildid) {
607 process_buildids(rec);
608
609 if (rec->buildid_all)
610 dsos__hit_all(rec->session);
611 }
612 perf_session__write_header(rec->session, rec->evlist, fd, true);
613
614 return;
615 }
616
record__synthesize_workload(struct record * rec,bool tail)617 static int record__synthesize_workload(struct record *rec, bool tail)
618 {
619 struct {
620 struct thread_map map;
621 struct thread_map_data map_data;
622 } thread_map;
623
624 if (rec->opts.tail_synthesize != tail)
625 return 0;
626
627 thread_map.map.nr = 1;
628 thread_map.map.map[0].pid = rec->evlist->workload.pid;
629 thread_map.map.map[0].comm = NULL;
630 return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
631 process_synthesized_event,
632 &rec->session->machines.host,
633 rec->opts.sample_address,
634 rec->opts.proc_map_timeout);
635 }
636
637 static int record__synthesize(struct record *rec, bool tail);
638
639 static int
record__switch_output(struct record * rec,bool at_exit)640 record__switch_output(struct record *rec, bool at_exit)
641 {
642 struct perf_data_file *file = &rec->file;
643 int fd, err;
644
645 /* Same Size: "2015122520103046"*/
646 char timestamp[] = "InvalidTimestamp";
647
648 record__synthesize(rec, true);
649 if (target__none(&rec->opts.target))
650 record__synthesize_workload(rec, true);
651
652 rec->samples = 0;
653 record__finish_output(rec);
654 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
655 if (err) {
656 pr_err("Failed to get current timestamp\n");
657 return -EINVAL;
658 }
659
660 fd = perf_data_file__switch(file, timestamp,
661 rec->session->header.data_offset,
662 at_exit);
663 if (fd >= 0 && !at_exit) {
664 rec->bytes_written = 0;
665 rec->session->header.data_size = 0;
666 }
667
668 if (!quiet)
669 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
670 file->path, timestamp);
671
672 /* Output tracking events */
673 if (!at_exit) {
674 record__synthesize(rec, false);
675
676 /*
677 * In 'perf record --switch-output' without -a,
678 * record__synthesize() in record__switch_output() won't
679 * generate tracking events because there's no thread_map
680 * in evlist. Which causes newly created perf.data doesn't
681 * contain map and comm information.
682 * Create a fake thread_map and directly call
683 * perf_event__synthesize_thread_map() for those events.
684 */
685 if (target__none(&rec->opts.target))
686 record__synthesize_workload(rec, false);
687 }
688 return fd;
689 }
690
691 static volatile int workload_exec_errno;
692
693 /*
694 * perf_evlist__prepare_workload will send a SIGUSR1
695 * if the fork fails, since we asked by setting its
696 * want_signal to true.
697 */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)698 static void workload_exec_failed_signal(int signo __maybe_unused,
699 siginfo_t *info,
700 void *ucontext __maybe_unused)
701 {
702 workload_exec_errno = info->si_value.sival_int;
703 done = 1;
704 child_finished = 1;
705 }
706
707 static void snapshot_sig_handler(int sig);
708
709 int __weak
perf_event__synth_time_conv(const struct perf_event_mmap_page * pc __maybe_unused,struct perf_tool * tool __maybe_unused,perf_event__handler_t process __maybe_unused,struct machine * machine __maybe_unused)710 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
711 struct perf_tool *tool __maybe_unused,
712 perf_event__handler_t process __maybe_unused,
713 struct machine *machine __maybe_unused)
714 {
715 return 0;
716 }
717
718 static const struct perf_event_mmap_page *
perf_evlist__pick_pc(struct perf_evlist * evlist)719 perf_evlist__pick_pc(struct perf_evlist *evlist)
720 {
721 if (evlist) {
722 if (evlist->mmap && evlist->mmap[0].base)
723 return evlist->mmap[0].base;
724 if (evlist->backward_mmap && evlist->backward_mmap[0].base)
725 return evlist->backward_mmap[0].base;
726 }
727 return NULL;
728 }
729
record__pick_pc(struct record * rec)730 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
731 {
732 const struct perf_event_mmap_page *pc;
733
734 pc = perf_evlist__pick_pc(rec->evlist);
735 if (pc)
736 return pc;
737 return NULL;
738 }
739
record__synthesize(struct record * rec,bool tail)740 static int record__synthesize(struct record *rec, bool tail)
741 {
742 struct perf_session *session = rec->session;
743 struct machine *machine = &session->machines.host;
744 struct perf_data_file *file = &rec->file;
745 struct record_opts *opts = &rec->opts;
746 struct perf_tool *tool = &rec->tool;
747 int fd = perf_data_file__fd(file);
748 int err = 0;
749
750 if (rec->opts.tail_synthesize != tail)
751 return 0;
752
753 if (file->is_pipe) {
754 err = perf_event__synthesize_attrs(tool, session,
755 process_synthesized_event);
756 if (err < 0) {
757 pr_err("Couldn't synthesize attrs.\n");
758 goto out;
759 }
760
761 if (have_tracepoints(&rec->evlist->entries)) {
762 /*
763 * FIXME err <= 0 here actually means that
764 * there were no tracepoints so its not really
765 * an error, just that we don't need to
766 * synthesize anything. We really have to
767 * return this more properly and also
768 * propagate errors that now are calling die()
769 */
770 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
771 process_synthesized_event);
772 if (err <= 0) {
773 pr_err("Couldn't record tracing data.\n");
774 goto out;
775 }
776 rec->bytes_written += err;
777 }
778 }
779
780 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
781 process_synthesized_event, machine);
782 if (err)
783 goto out;
784
785 if (rec->opts.full_auxtrace) {
786 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
787 session, process_synthesized_event);
788 if (err)
789 goto out;
790 }
791
792 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
793 machine);
794 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
795 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
796 "Check /proc/kallsyms permission or run as root.\n");
797
798 err = perf_event__synthesize_modules(tool, process_synthesized_event,
799 machine);
800 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
801 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
802 "Check /proc/modules permission or run as root.\n");
803
804 if (perf_guest) {
805 machines__process_guests(&session->machines,
806 perf_event__synthesize_guest_os, tool);
807 }
808
809 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
810 process_synthesized_event, opts->sample_address,
811 opts->proc_map_timeout);
812 out:
813 return err;
814 }
815
__cmd_record(struct record * rec,int argc,const char ** argv)816 static int __cmd_record(struct record *rec, int argc, const char **argv)
817 {
818 int err;
819 int status = 0;
820 unsigned long waking = 0;
821 const bool forks = argc > 0;
822 struct machine *machine;
823 struct perf_tool *tool = &rec->tool;
824 struct record_opts *opts = &rec->opts;
825 struct perf_data_file *file = &rec->file;
826 struct perf_session *session;
827 bool disabled = false, draining = false;
828 int fd;
829
830 rec->progname = argv[0];
831
832 atexit(record__sig_exit);
833 signal(SIGCHLD, sig_handler);
834 signal(SIGINT, sig_handler);
835 signal(SIGTERM, sig_handler);
836
837 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
838 signal(SIGUSR2, snapshot_sig_handler);
839 if (rec->opts.auxtrace_snapshot_mode)
840 trigger_on(&auxtrace_snapshot_trigger);
841 if (rec->switch_output)
842 trigger_on(&switch_output_trigger);
843 } else {
844 signal(SIGUSR2, SIG_IGN);
845 }
846
847 session = perf_session__new(file, false, tool);
848 if (session == NULL) {
849 pr_err("Perf session creation failed.\n");
850 return -1;
851 }
852
853 fd = perf_data_file__fd(file);
854 rec->session = session;
855
856 record__init_features(rec);
857
858 if (forks) {
859 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
860 argv, file->is_pipe,
861 workload_exec_failed_signal);
862 if (err < 0) {
863 pr_err("Couldn't run the workload!\n");
864 status = err;
865 goto out_delete_session;
866 }
867 }
868
869 if (record__open(rec) != 0) {
870 err = -1;
871 goto out_child;
872 }
873
874 err = bpf__apply_obj_config();
875 if (err) {
876 char errbuf[BUFSIZ];
877
878 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
879 pr_err("ERROR: Apply config to BPF failed: %s\n",
880 errbuf);
881 goto out_child;
882 }
883
884 /*
885 * Normally perf_session__new would do this, but it doesn't have the
886 * evlist.
887 */
888 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
889 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
890 rec->tool.ordered_events = false;
891 }
892
893 if (!rec->evlist->nr_groups)
894 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
895
896 if (file->is_pipe) {
897 err = perf_header__write_pipe(fd);
898 if (err < 0)
899 goto out_child;
900 } else {
901 err = perf_session__write_header(session, rec->evlist, fd, false);
902 if (err < 0)
903 goto out_child;
904 }
905
906 if (!rec->no_buildid
907 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
908 pr_err("Couldn't generate buildids. "
909 "Use --no-buildid to profile anyway.\n");
910 err = -1;
911 goto out_child;
912 }
913
914 machine = &session->machines.host;
915
916 err = record__synthesize(rec, false);
917 if (err < 0)
918 goto out_child;
919
920 if (rec->realtime_prio) {
921 struct sched_param param;
922
923 param.sched_priority = rec->realtime_prio;
924 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
925 pr_err("Could not set realtime priority.\n");
926 err = -1;
927 goto out_child;
928 }
929 }
930
931 /*
932 * When perf is starting the traced process, all the events
933 * (apart from group members) have enable_on_exec=1 set,
934 * so don't spoil it by prematurely enabling them.
935 */
936 if (!target__none(&opts->target) && !opts->initial_delay)
937 perf_evlist__enable(rec->evlist);
938
939 /*
940 * Let the child rip
941 */
942 if (forks) {
943 union perf_event *event;
944
945 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
946 if (event == NULL) {
947 err = -ENOMEM;
948 goto out_child;
949 }
950
951 /*
952 * Some H/W events are generated before COMM event
953 * which is emitted during exec(), so perf script
954 * cannot see a correct process name for those events.
955 * Synthesize COMM event to prevent it.
956 */
957 perf_event__synthesize_comm(tool, event,
958 rec->evlist->workload.pid,
959 process_synthesized_event,
960 machine);
961 free(event);
962
963 perf_evlist__start_workload(rec->evlist);
964 }
965
966 if (opts->initial_delay) {
967 usleep(opts->initial_delay * USEC_PER_MSEC);
968 perf_evlist__enable(rec->evlist);
969 }
970
971 trigger_ready(&auxtrace_snapshot_trigger);
972 trigger_ready(&switch_output_trigger);
973 for (;;) {
974 unsigned long long hits = rec->samples;
975
976 /*
977 * rec->evlist->bkw_mmap_state is possible to be
978 * BKW_MMAP_EMPTY here: when done == true and
979 * hits != rec->samples in previous round.
980 *
981 * perf_evlist__toggle_bkw_mmap ensure we never
982 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
983 */
984 if (trigger_is_hit(&switch_output_trigger) || done || draining)
985 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
986
987 if (record__mmap_read_all(rec) < 0) {
988 trigger_error(&auxtrace_snapshot_trigger);
989 trigger_error(&switch_output_trigger);
990 err = -1;
991 goto out_child;
992 }
993
994 if (auxtrace_record__snapshot_started) {
995 auxtrace_record__snapshot_started = 0;
996 if (!trigger_is_error(&auxtrace_snapshot_trigger))
997 record__read_auxtrace_snapshot(rec);
998 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
999 pr_err("AUX area tracing snapshot failed\n");
1000 err = -1;
1001 goto out_child;
1002 }
1003 }
1004
1005 if (trigger_is_hit(&switch_output_trigger)) {
1006 /*
1007 * If switch_output_trigger is hit, the data in
1008 * overwritable ring buffer should have been collected,
1009 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1010 *
1011 * If SIGUSR2 raise after or during record__mmap_read_all(),
1012 * record__mmap_read_all() didn't collect data from
1013 * overwritable ring buffer. Read again.
1014 */
1015 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1016 continue;
1017 trigger_ready(&switch_output_trigger);
1018
1019 /*
1020 * Reenable events in overwrite ring buffer after
1021 * record__mmap_read_all(): we should have collected
1022 * data from it.
1023 */
1024 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1025
1026 if (!quiet)
1027 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1028 waking);
1029 waking = 0;
1030 fd = record__switch_output(rec, false);
1031 if (fd < 0) {
1032 pr_err("Failed to switch to new file\n");
1033 trigger_error(&switch_output_trigger);
1034 err = fd;
1035 goto out_child;
1036 }
1037 }
1038
1039 if (hits == rec->samples) {
1040 if (done || draining)
1041 break;
1042 err = perf_evlist__poll(rec->evlist, -1);
1043 /*
1044 * Propagate error, only if there's any. Ignore positive
1045 * number of returned events and interrupt error.
1046 */
1047 if (err > 0 || (err < 0 && errno == EINTR))
1048 err = 0;
1049 waking++;
1050
1051 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1052 draining = true;
1053 }
1054
1055 /*
1056 * When perf is starting the traced process, at the end events
1057 * die with the process and we wait for that. Thus no need to
1058 * disable events in this case.
1059 */
1060 if (done && !disabled && !target__none(&opts->target)) {
1061 trigger_off(&auxtrace_snapshot_trigger);
1062 perf_evlist__disable(rec->evlist);
1063 disabled = true;
1064 }
1065 }
1066 trigger_off(&auxtrace_snapshot_trigger);
1067 trigger_off(&switch_output_trigger);
1068
1069 if (forks && workload_exec_errno) {
1070 char msg[STRERR_BUFSIZE];
1071 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1072 pr_err("Workload failed: %s\n", emsg);
1073 err = -1;
1074 goto out_child;
1075 }
1076
1077 if (!quiet)
1078 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1079
1080 if (target__none(&rec->opts.target))
1081 record__synthesize_workload(rec, true);
1082
1083 out_child:
1084 if (forks) {
1085 int exit_status;
1086
1087 if (!child_finished)
1088 kill(rec->evlist->workload.pid, SIGTERM);
1089
1090 wait(&exit_status);
1091
1092 if (err < 0)
1093 status = err;
1094 else if (WIFEXITED(exit_status))
1095 status = WEXITSTATUS(exit_status);
1096 else if (WIFSIGNALED(exit_status))
1097 signr = WTERMSIG(exit_status);
1098 } else
1099 status = err;
1100
1101 record__synthesize(rec, true);
1102 /* this will be recalculated during process_buildids() */
1103 rec->samples = 0;
1104
1105 if (!err) {
1106 if (!rec->timestamp_filename) {
1107 record__finish_output(rec);
1108 } else {
1109 fd = record__switch_output(rec, true);
1110 if (fd < 0) {
1111 status = fd;
1112 goto out_delete_session;
1113 }
1114 }
1115 }
1116
1117 if (!err && !quiet) {
1118 char samples[128];
1119 const char *postfix = rec->timestamp_filename ?
1120 ".<timestamp>" : "";
1121
1122 if (rec->samples && !rec->opts.full_auxtrace)
1123 scnprintf(samples, sizeof(samples),
1124 " (%" PRIu64 " samples)", rec->samples);
1125 else
1126 samples[0] = '\0';
1127
1128 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1129 perf_data_file__size(file) / 1024.0 / 1024.0,
1130 file->path, postfix, samples);
1131 }
1132
1133 out_delete_session:
1134 perf_session__delete(session);
1135 return status;
1136 }
1137
callchain_debug(struct callchain_param * callchain)1138 static void callchain_debug(struct callchain_param *callchain)
1139 {
1140 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1141
1142 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1143
1144 if (callchain->record_mode == CALLCHAIN_DWARF)
1145 pr_debug("callchain: stack dump size %d\n",
1146 callchain->dump_size);
1147 }
1148
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)1149 int record_opts__parse_callchain(struct record_opts *record,
1150 struct callchain_param *callchain,
1151 const char *arg, bool unset)
1152 {
1153 int ret;
1154 callchain->enabled = !unset;
1155
1156 /* --no-call-graph */
1157 if (unset) {
1158 callchain->record_mode = CALLCHAIN_NONE;
1159 pr_debug("callchain: disabled\n");
1160 return 0;
1161 }
1162
1163 ret = parse_callchain_record_opt(arg, callchain);
1164 if (!ret) {
1165 /* Enable data address sampling for DWARF unwind. */
1166 if (callchain->record_mode == CALLCHAIN_DWARF)
1167 record->sample_address = true;
1168 callchain_debug(callchain);
1169 }
1170
1171 return ret;
1172 }
1173
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)1174 int record_parse_callchain_opt(const struct option *opt,
1175 const char *arg,
1176 int unset)
1177 {
1178 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1179 }
1180
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)1181 int record_callchain_opt(const struct option *opt,
1182 const char *arg __maybe_unused,
1183 int unset __maybe_unused)
1184 {
1185 struct callchain_param *callchain = opt->value;
1186
1187 callchain->enabled = true;
1188
1189 if (callchain->record_mode == CALLCHAIN_NONE)
1190 callchain->record_mode = CALLCHAIN_FP;
1191
1192 callchain_debug(callchain);
1193 return 0;
1194 }
1195
perf_record_config(const char * var,const char * value,void * cb)1196 static int perf_record_config(const char *var, const char *value, void *cb)
1197 {
1198 struct record *rec = cb;
1199
1200 if (!strcmp(var, "record.build-id")) {
1201 if (!strcmp(value, "cache"))
1202 rec->no_buildid_cache = false;
1203 else if (!strcmp(value, "no-cache"))
1204 rec->no_buildid_cache = true;
1205 else if (!strcmp(value, "skip"))
1206 rec->no_buildid = true;
1207 else
1208 return -1;
1209 return 0;
1210 }
1211 if (!strcmp(var, "record.call-graph"))
1212 var = "call-graph.record-mode"; /* fall-through */
1213
1214 return perf_default_config(var, value, cb);
1215 }
1216
1217 struct clockid_map {
1218 const char *name;
1219 int clockid;
1220 };
1221
1222 #define CLOCKID_MAP(n, c) \
1223 { .name = n, .clockid = (c), }
1224
1225 #define CLOCKID_END { .name = NULL, }
1226
1227
1228 /*
1229 * Add the missing ones, we need to build on many distros...
1230 */
1231 #ifndef CLOCK_MONOTONIC_RAW
1232 #define CLOCK_MONOTONIC_RAW 4
1233 #endif
1234 #ifndef CLOCK_BOOTTIME
1235 #define CLOCK_BOOTTIME 7
1236 #endif
1237 #ifndef CLOCK_TAI
1238 #define CLOCK_TAI 11
1239 #endif
1240
1241 static const struct clockid_map clockids[] = {
1242 /* available for all events, NMI safe */
1243 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1244 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1245
1246 /* available for some events */
1247 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1248 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1249 CLOCKID_MAP("tai", CLOCK_TAI),
1250
1251 /* available for the lazy */
1252 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1253 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1254 CLOCKID_MAP("real", CLOCK_REALTIME),
1255 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1256
1257 CLOCKID_END,
1258 };
1259
parse_clockid(const struct option * opt,const char * str,int unset)1260 static int parse_clockid(const struct option *opt, const char *str, int unset)
1261 {
1262 struct record_opts *opts = (struct record_opts *)opt->value;
1263 const struct clockid_map *cm;
1264 const char *ostr = str;
1265
1266 if (unset) {
1267 opts->use_clockid = 0;
1268 return 0;
1269 }
1270
1271 /* no arg passed */
1272 if (!str)
1273 return 0;
1274
1275 /* no setting it twice */
1276 if (opts->use_clockid)
1277 return -1;
1278
1279 opts->use_clockid = true;
1280
1281 /* if its a number, we're done */
1282 if (sscanf(str, "%d", &opts->clockid) == 1)
1283 return 0;
1284
1285 /* allow a "CLOCK_" prefix to the name */
1286 if (!strncasecmp(str, "CLOCK_", 6))
1287 str += 6;
1288
1289 for (cm = clockids; cm->name; cm++) {
1290 if (!strcasecmp(str, cm->name)) {
1291 opts->clockid = cm->clockid;
1292 return 0;
1293 }
1294 }
1295
1296 opts->use_clockid = false;
1297 ui__warning("unknown clockid %s, check man page\n", ostr);
1298 return -1;
1299 }
1300
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)1301 static int record__parse_mmap_pages(const struct option *opt,
1302 const char *str,
1303 int unset __maybe_unused)
1304 {
1305 struct record_opts *opts = opt->value;
1306 char *s, *p;
1307 unsigned int mmap_pages;
1308 int ret;
1309
1310 if (!str)
1311 return -EINVAL;
1312
1313 s = strdup(str);
1314 if (!s)
1315 return -ENOMEM;
1316
1317 p = strchr(s, ',');
1318 if (p)
1319 *p = '\0';
1320
1321 if (*s) {
1322 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1323 if (ret)
1324 goto out_free;
1325 opts->mmap_pages = mmap_pages;
1326 }
1327
1328 if (!p) {
1329 ret = 0;
1330 goto out_free;
1331 }
1332
1333 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1334 if (ret)
1335 goto out_free;
1336
1337 opts->auxtrace_mmap_pages = mmap_pages;
1338
1339 out_free:
1340 free(s);
1341 return ret;
1342 }
1343
1344 static const char * const __record_usage[] = {
1345 "perf record [<options>] [<command>]",
1346 "perf record [<options>] -- <command> [<options>]",
1347 NULL
1348 };
1349 const char * const *record_usage = __record_usage;
1350
1351 /*
1352 * XXX Ideally would be local to cmd_record() and passed to a record__new
1353 * because we need to have access to it in record__exit, that is called
1354 * after cmd_record() exits, but since record_options need to be accessible to
1355 * builtin-script, leave it here.
1356 *
1357 * At least we don't ouch it in all the other functions here directly.
1358 *
1359 * Just say no to tons of global variables, sigh.
1360 */
1361 static struct record record = {
1362 .opts = {
1363 .sample_time = true,
1364 .mmap_pages = UINT_MAX,
1365 .user_freq = UINT_MAX,
1366 .user_interval = ULLONG_MAX,
1367 .freq = 4000,
1368 .target = {
1369 .uses_mmap = true,
1370 .default_per_cpu = true,
1371 },
1372 .proc_map_timeout = 500,
1373 },
1374 .tool = {
1375 .sample = process_sample_event,
1376 .fork = perf_event__process_fork,
1377 .exit = perf_event__process_exit,
1378 .comm = perf_event__process_comm,
1379 .mmap = perf_event__process_mmap,
1380 .mmap2 = perf_event__process_mmap2,
1381 .ordered_events = true,
1382 },
1383 };
1384
1385 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1386 "\n\t\t\t\tDefault: fp";
1387
1388 static bool dry_run;
1389
1390 /*
1391 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1392 * with it and switch to use the library functions in perf_evlist that came
1393 * from builtin-record.c, i.e. use record_opts,
1394 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1395 * using pipes, etc.
1396 */
1397 struct option __record_options[] = {
1398 OPT_CALLBACK('e', "event", &record.evlist, "event",
1399 "event selector. use 'perf list' to list available events",
1400 parse_events_option),
1401 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1402 "event filter", parse_filter),
1403 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1404 NULL, "don't record events from perf itself",
1405 exclude_perf),
1406 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1407 "record events on existing process id"),
1408 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1409 "record events on existing thread id"),
1410 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1411 "collect data with this RT SCHED_FIFO priority"),
1412 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1413 "collect data without buffering"),
1414 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1415 "collect raw sample records from all opened counters"),
1416 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1417 "system-wide collection from all CPUs"),
1418 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1419 "list of cpus to monitor"),
1420 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1421 OPT_STRING('o', "output", &record.file.path, "file",
1422 "output file name"),
1423 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1424 &record.opts.no_inherit_set,
1425 "child tasks do not inherit counters"),
1426 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1427 "synthesize non-sample events at the end of output"),
1428 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1429 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1430 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1431 "number of mmap data pages and AUX area tracing mmap pages",
1432 record__parse_mmap_pages),
1433 OPT_BOOLEAN(0, "group", &record.opts.group,
1434 "put the counters into a counter group"),
1435 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1436 NULL, "enables call-graph recording" ,
1437 &record_callchain_opt),
1438 OPT_CALLBACK(0, "call-graph", &record.opts,
1439 "record_mode[,record_size]", record_callchain_help,
1440 &record_parse_callchain_opt),
1441 OPT_INCR('v', "verbose", &verbose,
1442 "be more verbose (show counter open errors, etc)"),
1443 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1444 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1445 "per thread counts"),
1446 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1447 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1448 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1449 &record.opts.sample_time_set,
1450 "Record the sample timestamps"),
1451 OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
1452 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1453 "don't sample"),
1454 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1455 &record.no_buildid_cache_set,
1456 "do not update the buildid cache"),
1457 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1458 &record.no_buildid_set,
1459 "do not collect buildids in perf.data"),
1460 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1461 "monitor event in cgroup name only",
1462 parse_cgroups),
1463 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1464 "ms to wait before starting measurement after program start"),
1465 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1466 "user to profile"),
1467
1468 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1469 "branch any", "sample any taken branches",
1470 parse_branch_stack),
1471
1472 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1473 "branch filter mask", "branch stack filter modes",
1474 parse_branch_stack),
1475 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1476 "sample by weight (on special events only)"),
1477 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1478 "sample transaction flags (special events only)"),
1479 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1480 "use per-thread mmaps"),
1481 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1482 "sample selected machine registers on interrupt,"
1483 " use -I ? to list register names", parse_regs),
1484 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1485 "Record running/enabled time of read (:S) events"),
1486 OPT_CALLBACK('k', "clockid", &record.opts,
1487 "clockid", "clockid to use for events, see clock_gettime()",
1488 parse_clockid),
1489 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1490 "opts", "AUX area tracing Snapshot Mode", ""),
1491 OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1492 "per thread proc mmap processing timeout in ms"),
1493 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1494 "Record context switch events"),
1495 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1496 "Configure all used events to run in kernel space.",
1497 PARSE_OPT_EXCLUSIVE),
1498 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1499 "Configure all used events to run in user space.",
1500 PARSE_OPT_EXCLUSIVE),
1501 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1502 "clang binary to use for compiling BPF scriptlets"),
1503 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1504 "options passed to clang when compiling BPF scriptlets"),
1505 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1506 "file", "vmlinux pathname"),
1507 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1508 "Record build-id of all DSOs regardless of hits"),
1509 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1510 "append timestamp to output filename"),
1511 OPT_BOOLEAN(0, "switch-output", &record.switch_output,
1512 "Switch output when receive SIGUSR2"),
1513 OPT_BOOLEAN(0, "dry-run", &dry_run,
1514 "Parse options then exit"),
1515 OPT_END()
1516 };
1517
1518 struct option *record_options = __record_options;
1519
cmd_record(int argc,const char ** argv,const char * prefix __maybe_unused)1520 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1521 {
1522 int err;
1523 struct record *rec = &record;
1524 char errbuf[BUFSIZ];
1525
1526 #ifndef HAVE_LIBBPF_SUPPORT
1527 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1528 set_nobuild('\0', "clang-path", true);
1529 set_nobuild('\0', "clang-opt", true);
1530 # undef set_nobuild
1531 #endif
1532
1533 #ifndef HAVE_BPF_PROLOGUE
1534 # if !defined (HAVE_DWARF_SUPPORT)
1535 # define REASON "NO_DWARF=1"
1536 # elif !defined (HAVE_LIBBPF_SUPPORT)
1537 # define REASON "NO_LIBBPF=1"
1538 # else
1539 # define REASON "this architecture doesn't support BPF prologue"
1540 # endif
1541 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1542 set_nobuild('\0', "vmlinux", true);
1543 # undef set_nobuild
1544 # undef REASON
1545 #endif
1546
1547 rec->evlist = perf_evlist__new();
1548 if (rec->evlist == NULL)
1549 return -ENOMEM;
1550
1551 perf_config(perf_record_config, rec);
1552
1553 argc = parse_options(argc, argv, record_options, record_usage,
1554 PARSE_OPT_STOP_AT_NON_OPTION);
1555 if (!argc && target__none(&rec->opts.target))
1556 usage_with_options(record_usage, record_options);
1557
1558 if (nr_cgroups && !rec->opts.target.system_wide) {
1559 usage_with_options_msg(record_usage, record_options,
1560 "cgroup monitoring only available in system-wide mode");
1561
1562 }
1563 if (rec->opts.record_switch_events &&
1564 !perf_can_record_switch_events()) {
1565 ui__error("kernel does not support recording context switch events\n");
1566 parse_options_usage(record_usage, record_options, "switch-events", 0);
1567 return -EINVAL;
1568 }
1569
1570 if (rec->switch_output)
1571 rec->timestamp_filename = true;
1572
1573 if (!rec->itr) {
1574 rec->itr = auxtrace_record__init(rec->evlist, &err);
1575 if (err)
1576 goto out;
1577 }
1578
1579 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1580 rec->opts.auxtrace_snapshot_opts);
1581 if (err)
1582 goto out;
1583
1584 /*
1585 * Allow aliases to facilitate the lookup of symbols for address
1586 * filters. Refer to auxtrace_parse_filters().
1587 */
1588 symbol_conf.allow_aliases = true;
1589
1590 symbol__init(NULL);
1591
1592 err = auxtrace_parse_filters(rec->evlist);
1593 if (err)
1594 goto out;
1595
1596 if (dry_run)
1597 goto out;
1598
1599 err = bpf__setup_stdout(rec->evlist);
1600 if (err) {
1601 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1602 pr_err("ERROR: Setup BPF stdout failed: %s\n",
1603 errbuf);
1604 goto out;
1605 }
1606
1607 err = -ENOMEM;
1608
1609 if (symbol_conf.kptr_restrict)
1610 pr_warning(
1611 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1612 "check /proc/sys/kernel/kptr_restrict.\n\n"
1613 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1614 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1615 "Samples in kernel modules won't be resolved at all.\n\n"
1616 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1617 "even with a suitable vmlinux or kallsyms file.\n\n");
1618
1619 if (rec->no_buildid_cache || rec->no_buildid) {
1620 disable_buildid_cache();
1621 } else if (rec->switch_output) {
1622 /*
1623 * In 'perf record --switch-output', disable buildid
1624 * generation by default to reduce data file switching
1625 * overhead. Still generate buildid if they are required
1626 * explicitly using
1627 *
1628 * perf record --signal-trigger --no-no-buildid \
1629 * --no-no-buildid-cache
1630 *
1631 * Following code equals to:
1632 *
1633 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1634 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1635 * disable_buildid_cache();
1636 */
1637 bool disable = true;
1638
1639 if (rec->no_buildid_set && !rec->no_buildid)
1640 disable = false;
1641 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1642 disable = false;
1643 if (disable) {
1644 rec->no_buildid = true;
1645 rec->no_buildid_cache = true;
1646 disable_buildid_cache();
1647 }
1648 }
1649
1650 if (record.opts.overwrite)
1651 record.opts.tail_synthesize = true;
1652
1653 if (rec->evlist->nr_entries == 0 &&
1654 perf_evlist__add_default(rec->evlist) < 0) {
1655 pr_err("Not enough memory for event selector list\n");
1656 goto out;
1657 }
1658
1659 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1660 rec->opts.no_inherit = true;
1661
1662 err = target__validate(&rec->opts.target);
1663 if (err) {
1664 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1665 ui__warning("%s", errbuf);
1666 }
1667
1668 err = target__parse_uid(&rec->opts.target);
1669 if (err) {
1670 int saved_errno = errno;
1671
1672 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1673 ui__error("%s", errbuf);
1674
1675 err = -saved_errno;
1676 goto out;
1677 }
1678
1679 err = -ENOMEM;
1680 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1681 usage_with_options(record_usage, record_options);
1682
1683 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1684 if (err)
1685 goto out;
1686
1687 /*
1688 * We take all buildids when the file contains
1689 * AUX area tracing data because we do not decode the
1690 * trace because it would take too long.
1691 */
1692 if (rec->opts.full_auxtrace)
1693 rec->buildid_all = true;
1694
1695 if (record_opts__config(&rec->opts)) {
1696 err = -EINVAL;
1697 goto out;
1698 }
1699
1700 err = __cmd_record(&record, argc, argv);
1701 out:
1702 perf_evlist__delete(rec->evlist);
1703 symbol__exit();
1704 auxtrace_record__free(rec->itr);
1705 return err;
1706 }
1707
snapshot_sig_handler(int sig __maybe_unused)1708 static void snapshot_sig_handler(int sig __maybe_unused)
1709 {
1710 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1711 trigger_hit(&auxtrace_snapshot_trigger);
1712 auxtrace_record__snapshot_started = 1;
1713 if (auxtrace_record__snapshot_start(record.itr))
1714 trigger_error(&auxtrace_snapshot_trigger);
1715 }
1716
1717 if (trigger_is_ready(&switch_output_trigger))
1718 trigger_hit(&switch_output_trigger);
1719 }
1720