• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * builtin-record.c
4  *
5  * Builtin record command: Record the profile of a workload
6  * (or a CPU, or a PID) into the perf.data output file - for
7  * later analysis via perf report.
8  */
9 #include "builtin.h"
10 
11 #include "perf.h"
12 
13 #include "util/build-id.h"
14 #include "util/util.h"
15 #include <subcmd/parse-options.h>
16 #include "util/parse-events.h"
17 #include "util/config.h"
18 
19 #include "util/callchain.h"
20 #include "util/cgroup.h"
21 #include "util/header.h"
22 #include "util/event.h"
23 #include "util/evlist.h"
24 #include "util/evsel.h"
25 #include "util/debug.h"
26 #include "util/drv_configs.h"
27 #include "util/session.h"
28 #include "util/tool.h"
29 #include "util/symbol.h"
30 #include "util/cpumap.h"
31 #include "util/thread_map.h"
32 #include "util/data.h"
33 #include "util/perf_regs.h"
34 #include "util/auxtrace.h"
35 #include "util/tsc.h"
36 #include "util/parse-branch-options.h"
37 #include "util/parse-regs-options.h"
38 #include "util/llvm-utils.h"
39 #include "util/bpf-loader.h"
40 #include "util/trigger.h"
41 #include "util/perf-hooks.h"
42 #include "util/time-utils.h"
43 #include "util/units.h"
44 #include "asm/bug.h"
45 
46 #include <errno.h>
47 #include <inttypes.h>
48 #include <poll.h>
49 #include <unistd.h>
50 #include <sched.h>
51 #include <signal.h>
52 #include <sys/mman.h>
53 #include <sys/wait.h>
54 #include <asm/bug.h>
55 #include <linux/time64.h>
56 
57 struct switch_output {
58 	bool		 enabled;
59 	bool		 signal;
60 	unsigned long	 size;
61 	unsigned long	 time;
62 	const char	*str;
63 	bool		 set;
64 };
65 
66 struct record {
67 	struct perf_tool	tool;
68 	struct record_opts	opts;
69 	u64			bytes_written;
70 	struct perf_data_file	file;
71 	struct auxtrace_record	*itr;
72 	struct perf_evlist	*evlist;
73 	struct perf_session	*session;
74 	const char		*progname;
75 	int			realtime_prio;
76 	bool			no_buildid;
77 	bool			no_buildid_set;
78 	bool			no_buildid_cache;
79 	bool			no_buildid_cache_set;
80 	bool			buildid_all;
81 	bool			timestamp_filename;
82 	struct switch_output	switch_output;
83 	unsigned long long	samples;
84 };
85 
86 static volatile int auxtrace_record__snapshot_started;
87 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88 static DEFINE_TRIGGER(switch_output_trigger);
89 
switch_output_signal(struct record * rec)90 static bool switch_output_signal(struct record *rec)
91 {
92 	return rec->switch_output.signal &&
93 	       trigger_is_ready(&switch_output_trigger);
94 }
95 
switch_output_size(struct record * rec)96 static bool switch_output_size(struct record *rec)
97 {
98 	return rec->switch_output.size &&
99 	       trigger_is_ready(&switch_output_trigger) &&
100 	       (rec->bytes_written >= rec->switch_output.size);
101 }
102 
switch_output_time(struct record * rec)103 static bool switch_output_time(struct record *rec)
104 {
105 	return rec->switch_output.time &&
106 	       trigger_is_ready(&switch_output_trigger);
107 }
108 
record__write(struct record * rec,void * bf,size_t size)109 static int record__write(struct record *rec, void *bf, size_t size)
110 {
111 	if (perf_data_file__write(rec->session->file, bf, size) < 0) {
112 		pr_err("failed to write perf data, error: %m\n");
113 		return -1;
114 	}
115 
116 	rec->bytes_written += size;
117 
118 	if (switch_output_size(rec))
119 		trigger_hit(&switch_output_trigger);
120 
121 	return 0;
122 }
123 
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)124 static int process_synthesized_event(struct perf_tool *tool,
125 				     union perf_event *event,
126 				     struct perf_sample *sample __maybe_unused,
127 				     struct machine *machine __maybe_unused)
128 {
129 	struct record *rec = container_of(tool, struct record, tool);
130 	return record__write(rec, event, event->header.size);
131 }
132 
133 static int
backward_rb_find_range(void * buf,int mask,u64 head,u64 * start,u64 * end)134 backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
135 {
136 	struct perf_event_header *pheader;
137 	u64 evt_head = head;
138 	int size = mask + 1;
139 
140 	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
141 	pheader = (struct perf_event_header *)(buf + (head & mask));
142 	*start = head;
143 	while (true) {
144 		if (evt_head - head >= (unsigned int)size) {
145 			pr_debug("Finished reading backward ring buffer: rewind\n");
146 			if (evt_head - head > (unsigned int)size)
147 				evt_head -= pheader->size;
148 			*end = evt_head;
149 			return 0;
150 		}
151 
152 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
153 
154 		if (pheader->size == 0) {
155 			pr_debug("Finished reading backward ring buffer: get start\n");
156 			*end = evt_head;
157 			return 0;
158 		}
159 
160 		evt_head += pheader->size;
161 		pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
162 	}
163 	WARN_ONCE(1, "Shouldn't get here\n");
164 	return -1;
165 }
166 
167 static int
rb_find_range(void * data,int mask,u64 head,u64 old,u64 * start,u64 * end,bool backward)168 rb_find_range(void *data, int mask, u64 head, u64 old,
169 	      u64 *start, u64 *end, bool backward)
170 {
171 	if (!backward) {
172 		*start = old;
173 		*end = head;
174 		return 0;
175 	}
176 
177 	return backward_rb_find_range(data, mask, head, start, end);
178 }
179 
180 static int
record__mmap_read(struct record * rec,struct perf_mmap * md,bool overwrite,bool backward)181 record__mmap_read(struct record *rec, struct perf_mmap *md,
182 		  bool overwrite, bool backward)
183 {
184 	u64 head = perf_mmap__read_head(md);
185 	u64 old = md->prev;
186 	u64 end = head, start = old;
187 	unsigned char *data = md->base + page_size;
188 	unsigned long size;
189 	void *buf;
190 	int rc = 0;
191 
192 	if (rb_find_range(data, md->mask, head,
193 			  old, &start, &end, backward))
194 		return -1;
195 
196 	if (start == end)
197 		return 0;
198 
199 	rec->samples++;
200 
201 	size = end - start;
202 	if (size > (unsigned long)(md->mask) + 1) {
203 		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
204 
205 		md->prev = head;
206 		perf_mmap__consume(md, overwrite || backward);
207 		return 0;
208 	}
209 
210 	if ((start & md->mask) + size != (end & md->mask)) {
211 		buf = &data[start & md->mask];
212 		size = md->mask + 1 - (start & md->mask);
213 		start += size;
214 
215 		if (record__write(rec, buf, size) < 0) {
216 			rc = -1;
217 			goto out;
218 		}
219 	}
220 
221 	buf = &data[start & md->mask];
222 	size = end - start;
223 	start += size;
224 
225 	if (record__write(rec, buf, size) < 0) {
226 		rc = -1;
227 		goto out;
228 	}
229 
230 	md->prev = head;
231 	perf_mmap__consume(md, overwrite || backward);
232 out:
233 	return rc;
234 }
235 
236 static volatile int done;
237 static volatile int signr = -1;
238 static volatile int child_finished;
239 
sig_handler(int sig)240 static void sig_handler(int sig)
241 {
242 	if (sig == SIGCHLD)
243 		child_finished = 1;
244 	else
245 		signr = sig;
246 
247 	done = 1;
248 }
249 
sigsegv_handler(int sig)250 static void sigsegv_handler(int sig)
251 {
252 	perf_hooks__recover();
253 	sighandler_dump_stack(sig);
254 }
255 
record__sig_exit(void)256 static void record__sig_exit(void)
257 {
258 	if (signr == -1)
259 		return;
260 
261 	signal(signr, SIG_DFL);
262 	raise(signr);
263 }
264 
265 #ifdef HAVE_AUXTRACE_SUPPORT
266 
record__process_auxtrace(struct perf_tool * tool,union perf_event * event,void * data1,size_t len1,void * data2,size_t len2)267 static int record__process_auxtrace(struct perf_tool *tool,
268 				    union perf_event *event, void *data1,
269 				    size_t len1, void *data2, size_t len2)
270 {
271 	struct record *rec = container_of(tool, struct record, tool);
272 	struct perf_data_file *file = &rec->file;
273 	size_t padding;
274 	u8 pad[8] = {0};
275 
276 	if (!perf_data_file__is_pipe(file)) {
277 		off_t file_offset;
278 		int fd = perf_data_file__fd(file);
279 		int err;
280 
281 		file_offset = lseek(fd, 0, SEEK_CUR);
282 		if (file_offset == -1)
283 			return -1;
284 		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
285 						     event, file_offset);
286 		if (err)
287 			return err;
288 	}
289 
290 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
291 	padding = (len1 + len2) & 7;
292 	if (padding)
293 		padding = 8 - padding;
294 
295 	record__write(rec, event, event->header.size);
296 	record__write(rec, data1, len1);
297 	if (len2)
298 		record__write(rec, data2, len2);
299 	record__write(rec, &pad, padding);
300 
301 	return 0;
302 }
303 
record__auxtrace_mmap_read(struct record * rec,struct auxtrace_mmap * mm)304 static int record__auxtrace_mmap_read(struct record *rec,
305 				      struct auxtrace_mmap *mm)
306 {
307 	int ret;
308 
309 	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
310 				  record__process_auxtrace);
311 	if (ret < 0)
312 		return ret;
313 
314 	if (ret)
315 		rec->samples++;
316 
317 	return 0;
318 }
319 
record__auxtrace_mmap_read_snapshot(struct record * rec,struct auxtrace_mmap * mm)320 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
321 					       struct auxtrace_mmap *mm)
322 {
323 	int ret;
324 
325 	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
326 					   record__process_auxtrace,
327 					   rec->opts.auxtrace_snapshot_size);
328 	if (ret < 0)
329 		return ret;
330 
331 	if (ret)
332 		rec->samples++;
333 
334 	return 0;
335 }
336 
record__auxtrace_read_snapshot_all(struct record * rec)337 static int record__auxtrace_read_snapshot_all(struct record *rec)
338 {
339 	int i;
340 	int rc = 0;
341 
342 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
343 		struct auxtrace_mmap *mm =
344 				&rec->evlist->mmap[i].auxtrace_mmap;
345 
346 		if (!mm->base)
347 			continue;
348 
349 		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
350 			rc = -1;
351 			goto out;
352 		}
353 	}
354 out:
355 	return rc;
356 }
357 
record__read_auxtrace_snapshot(struct record * rec)358 static void record__read_auxtrace_snapshot(struct record *rec)
359 {
360 	pr_debug("Recording AUX area tracing snapshot\n");
361 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
362 		trigger_error(&auxtrace_snapshot_trigger);
363 	} else {
364 		if (auxtrace_record__snapshot_finish(rec->itr))
365 			trigger_error(&auxtrace_snapshot_trigger);
366 		else
367 			trigger_ready(&auxtrace_snapshot_trigger);
368 	}
369 }
370 
371 #else
372 
373 static inline
record__auxtrace_mmap_read(struct record * rec __maybe_unused,struct auxtrace_mmap * mm __maybe_unused)374 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
375 			       struct auxtrace_mmap *mm __maybe_unused)
376 {
377 	return 0;
378 }
379 
380 static inline
record__read_auxtrace_snapshot(struct record * rec __maybe_unused)381 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
382 {
383 }
384 
385 static inline
auxtrace_record__snapshot_start(struct auxtrace_record * itr __maybe_unused)386 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
387 {
388 	return 0;
389 }
390 
391 #endif
392 
record__mmap_evlist(struct record * rec,struct perf_evlist * evlist)393 static int record__mmap_evlist(struct record *rec,
394 			       struct perf_evlist *evlist)
395 {
396 	struct record_opts *opts = &rec->opts;
397 	char msg[512];
398 
399 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
400 				 opts->auxtrace_mmap_pages,
401 				 opts->auxtrace_snapshot_mode) < 0) {
402 		if (errno == EPERM) {
403 			pr_err("Permission error mapping pages.\n"
404 			       "Consider increasing "
405 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
406 			       "or try again with a smaller value of -m/--mmap_pages.\n"
407 			       "(current value: %u,%u)\n",
408 			       opts->mmap_pages, opts->auxtrace_mmap_pages);
409 			return -errno;
410 		} else {
411 			pr_err("failed to mmap with %d (%s)\n", errno,
412 				str_error_r(errno, msg, sizeof(msg)));
413 			if (errno)
414 				return -errno;
415 			else
416 				return -EINVAL;
417 		}
418 	}
419 	return 0;
420 }
421 
record__mmap(struct record * rec)422 static int record__mmap(struct record *rec)
423 {
424 	return record__mmap_evlist(rec, rec->evlist);
425 }
426 
record__open(struct record * rec)427 static int record__open(struct record *rec)
428 {
429 	char msg[BUFSIZ];
430 	struct perf_evsel *pos;
431 	struct perf_evlist *evlist = rec->evlist;
432 	struct perf_session *session = rec->session;
433 	struct record_opts *opts = &rec->opts;
434 	struct perf_evsel_config_term *err_term;
435 	int rc = 0;
436 
437 	perf_evlist__config(evlist, opts, &callchain_param);
438 
439 	evlist__for_each_entry(evlist, pos) {
440 try_again:
441 		if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
442 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
443 				if (verbose > 0)
444 					ui__warning("%s\n", msg);
445 				goto try_again;
446 			}
447 
448 			rc = -errno;
449 			perf_evsel__open_strerror(pos, &opts->target,
450 						  errno, msg, sizeof(msg));
451 			ui__error("%s\n", msg);
452 			goto out;
453 		}
454 	}
455 
456 	if (perf_evlist__apply_filters(evlist, &pos)) {
457 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
458 			pos->filter, perf_evsel__name(pos), errno,
459 			str_error_r(errno, msg, sizeof(msg)));
460 		rc = -1;
461 		goto out;
462 	}
463 
464 	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
465 		pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
466 		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
467 		      str_error_r(errno, msg, sizeof(msg)));
468 		rc = -1;
469 		goto out;
470 	}
471 
472 	rc = record__mmap(rec);
473 	if (rc)
474 		goto out;
475 
476 	session->evlist = evlist;
477 	perf_session__set_id_hdr_size(session);
478 out:
479 	return rc;
480 }
481 
process_sample_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample,struct perf_evsel * evsel,struct machine * machine)482 static int process_sample_event(struct perf_tool *tool,
483 				union perf_event *event,
484 				struct perf_sample *sample,
485 				struct perf_evsel *evsel,
486 				struct machine *machine)
487 {
488 	struct record *rec = container_of(tool, struct record, tool);
489 
490 	rec->samples++;
491 
492 	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
493 }
494 
process_buildids(struct record * rec)495 static int process_buildids(struct record *rec)
496 {
497 	struct perf_data_file *file  = &rec->file;
498 	struct perf_session *session = rec->session;
499 
500 	if (file->size == 0)
501 		return 0;
502 
503 	/*
504 	 * During this process, it'll load kernel map and replace the
505 	 * dso->long_name to a real pathname it found.  In this case
506 	 * we prefer the vmlinux path like
507 	 *   /lib/modules/3.16.4/build/vmlinux
508 	 *
509 	 * rather than build-id path (in debug directory).
510 	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
511 	 */
512 	symbol_conf.ignore_vmlinux_buildid = true;
513 
514 	/*
515 	 * If --buildid-all is given, it marks all DSO regardless of hits,
516 	 * so no need to process samples.
517 	 */
518 	if (rec->buildid_all)
519 		rec->tool.sample = NULL;
520 
521 	return perf_session__process_events(session);
522 }
523 
perf_event__synthesize_guest_os(struct machine * machine,void * data)524 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
525 {
526 	int err;
527 	struct perf_tool *tool = data;
528 	/*
529 	 *As for guest kernel when processing subcommand record&report,
530 	 *we arrange module mmap prior to guest kernel mmap and trigger
531 	 *a preload dso because default guest module symbols are loaded
532 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
533 	 *method is used to avoid symbol missing when the first addr is
534 	 *in module instead of in guest kernel.
535 	 */
536 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
537 					     machine);
538 	if (err < 0)
539 		pr_err("Couldn't record guest kernel [%d]'s reference"
540 		       " relocation symbol.\n", machine->pid);
541 
542 	/*
543 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
544 	 * have no _text sometimes.
545 	 */
546 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
547 						 machine);
548 	if (err < 0)
549 		pr_err("Couldn't record guest kernel [%d]'s reference"
550 		       " relocation symbol.\n", machine->pid);
551 }
552 
553 static struct perf_event_header finished_round_event = {
554 	.size = sizeof(struct perf_event_header),
555 	.type = PERF_RECORD_FINISHED_ROUND,
556 };
557 
record__mmap_read_evlist(struct record * rec,struct perf_evlist * evlist,bool backward)558 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
559 				    bool backward)
560 {
561 	u64 bytes_written = rec->bytes_written;
562 	int i;
563 	int rc = 0;
564 	struct perf_mmap *maps;
565 
566 	if (!evlist)
567 		return 0;
568 
569 	maps = backward ? evlist->backward_mmap : evlist->mmap;
570 	if (!maps)
571 		return 0;
572 
573 	if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
574 		return 0;
575 
576 	for (i = 0; i < evlist->nr_mmaps; i++) {
577 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
578 
579 		if (maps[i].base) {
580 			if (record__mmap_read(rec, &maps[i],
581 					      evlist->overwrite, backward) != 0) {
582 				rc = -1;
583 				goto out;
584 			}
585 		}
586 
587 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
588 		    record__auxtrace_mmap_read(rec, mm) != 0) {
589 			rc = -1;
590 			goto out;
591 		}
592 	}
593 
594 	/*
595 	 * Mark the round finished in case we wrote
596 	 * at least one event.
597 	 */
598 	if (bytes_written != rec->bytes_written)
599 		rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
600 
601 	if (backward)
602 		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
603 out:
604 	return rc;
605 }
606 
record__mmap_read_all(struct record * rec)607 static int record__mmap_read_all(struct record *rec)
608 {
609 	int err;
610 
611 	err = record__mmap_read_evlist(rec, rec->evlist, false);
612 	if (err)
613 		return err;
614 
615 	return record__mmap_read_evlist(rec, rec->evlist, true);
616 }
617 
record__init_features(struct record * rec)618 static void record__init_features(struct record *rec)
619 {
620 	struct perf_session *session = rec->session;
621 	int feat;
622 
623 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
624 		perf_header__set_feat(&session->header, feat);
625 
626 	if (rec->no_buildid)
627 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
628 
629 	if (!have_tracepoints(&rec->evlist->entries))
630 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
631 
632 	if (!rec->opts.branch_stack)
633 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
634 
635 	if (!rec->opts.full_auxtrace)
636 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
637 
638 	perf_header__clear_feat(&session->header, HEADER_STAT);
639 }
640 
641 static void
record__finish_output(struct record * rec)642 record__finish_output(struct record *rec)
643 {
644 	struct perf_data_file *file = &rec->file;
645 	int fd = perf_data_file__fd(file);
646 
647 	if (file->is_pipe)
648 		return;
649 
650 	rec->session->header.data_size += rec->bytes_written;
651 	file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
652 
653 	if (!rec->no_buildid) {
654 		process_buildids(rec);
655 
656 		if (rec->buildid_all)
657 			dsos__hit_all(rec->session);
658 	}
659 	perf_session__write_header(rec->session, rec->evlist, fd, true);
660 
661 	return;
662 }
663 
record__synthesize_workload(struct record * rec,bool tail)664 static int record__synthesize_workload(struct record *rec, bool tail)
665 {
666 	int err;
667 	struct thread_map *thread_map;
668 
669 	if (rec->opts.tail_synthesize != tail)
670 		return 0;
671 
672 	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
673 	if (thread_map == NULL)
674 		return -1;
675 
676 	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
677 						 process_synthesized_event,
678 						 &rec->session->machines.host,
679 						 rec->opts.sample_address,
680 						 rec->opts.proc_map_timeout);
681 	thread_map__put(thread_map);
682 	return err;
683 }
684 
685 static int record__synthesize(struct record *rec, bool tail);
686 
687 static int
record__switch_output(struct record * rec,bool at_exit)688 record__switch_output(struct record *rec, bool at_exit)
689 {
690 	struct perf_data_file *file = &rec->file;
691 	int fd, err;
692 
693 	/* Same Size:      "2015122520103046"*/
694 	char timestamp[] = "InvalidTimestamp";
695 
696 	record__synthesize(rec, true);
697 	if (target__none(&rec->opts.target))
698 		record__synthesize_workload(rec, true);
699 
700 	rec->samples = 0;
701 	record__finish_output(rec);
702 	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
703 	if (err) {
704 		pr_err("Failed to get current timestamp\n");
705 		return -EINVAL;
706 	}
707 
708 	fd = perf_data_file__switch(file, timestamp,
709 				    rec->session->header.data_offset,
710 				    at_exit);
711 	if (fd >= 0 && !at_exit) {
712 		rec->bytes_written = 0;
713 		rec->session->header.data_size = 0;
714 	}
715 
716 	if (!quiet)
717 		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
718 			file->path, timestamp);
719 
720 	/* Output tracking events */
721 	if (!at_exit) {
722 		record__synthesize(rec, false);
723 
724 		/*
725 		 * In 'perf record --switch-output' without -a,
726 		 * record__synthesize() in record__switch_output() won't
727 		 * generate tracking events because there's no thread_map
728 		 * in evlist. Which causes newly created perf.data doesn't
729 		 * contain map and comm information.
730 		 * Create a fake thread_map and directly call
731 		 * perf_event__synthesize_thread_map() for those events.
732 		 */
733 		if (target__none(&rec->opts.target))
734 			record__synthesize_workload(rec, false);
735 	}
736 	return fd;
737 }
738 
739 static volatile int workload_exec_errno;
740 
741 /*
742  * perf_evlist__prepare_workload will send a SIGUSR1
743  * if the fork fails, since we asked by setting its
744  * want_signal to true.
745  */
workload_exec_failed_signal(int signo __maybe_unused,siginfo_t * info,void * ucontext __maybe_unused)746 static void workload_exec_failed_signal(int signo __maybe_unused,
747 					siginfo_t *info,
748 					void *ucontext __maybe_unused)
749 {
750 	workload_exec_errno = info->si_value.sival_int;
751 	done = 1;
752 	child_finished = 1;
753 }
754 
755 static void snapshot_sig_handler(int sig);
756 static void alarm_sig_handler(int sig);
757 
758 int __weak
perf_event__synth_time_conv(const struct perf_event_mmap_page * pc __maybe_unused,struct perf_tool * tool __maybe_unused,perf_event__handler_t process __maybe_unused,struct machine * machine __maybe_unused)759 perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
760 			    struct perf_tool *tool __maybe_unused,
761 			    perf_event__handler_t process __maybe_unused,
762 			    struct machine *machine __maybe_unused)
763 {
764 	return 0;
765 }
766 
767 static const struct perf_event_mmap_page *
perf_evlist__pick_pc(struct perf_evlist * evlist)768 perf_evlist__pick_pc(struct perf_evlist *evlist)
769 {
770 	if (evlist) {
771 		if (evlist->mmap && evlist->mmap[0].base)
772 			return evlist->mmap[0].base;
773 		if (evlist->backward_mmap && evlist->backward_mmap[0].base)
774 			return evlist->backward_mmap[0].base;
775 	}
776 	return NULL;
777 }
778 
record__pick_pc(struct record * rec)779 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
780 {
781 	const struct perf_event_mmap_page *pc;
782 
783 	pc = perf_evlist__pick_pc(rec->evlist);
784 	if (pc)
785 		return pc;
786 	return NULL;
787 }
788 
record__synthesize(struct record * rec,bool tail)789 static int record__synthesize(struct record *rec, bool tail)
790 {
791 	struct perf_session *session = rec->session;
792 	struct machine *machine = &session->machines.host;
793 	struct perf_data_file *file = &rec->file;
794 	struct record_opts *opts = &rec->opts;
795 	struct perf_tool *tool = &rec->tool;
796 	int fd = perf_data_file__fd(file);
797 	int err = 0;
798 
799 	if (rec->opts.tail_synthesize != tail)
800 		return 0;
801 
802 	if (file->is_pipe) {
803 		/*
804 		 * We need to synthesize events first, because some
805 		 * features works on top of them (on report side).
806 		 */
807 		err = perf_event__synthesize_attrs(tool, session,
808 						   process_synthesized_event);
809 		if (err < 0) {
810 			pr_err("Couldn't synthesize attrs.\n");
811 			goto out;
812 		}
813 
814 		err = perf_event__synthesize_features(tool, session, rec->evlist,
815 						      process_synthesized_event);
816 		if (err < 0) {
817 			pr_err("Couldn't synthesize features.\n");
818 			return err;
819 		}
820 
821 		if (have_tracepoints(&rec->evlist->entries)) {
822 			/*
823 			 * FIXME err <= 0 here actually means that
824 			 * there were no tracepoints so its not really
825 			 * an error, just that we don't need to
826 			 * synthesize anything.  We really have to
827 			 * return this more properly and also
828 			 * propagate errors that now are calling die()
829 			 */
830 			err = perf_event__synthesize_tracing_data(tool,	fd, rec->evlist,
831 								  process_synthesized_event);
832 			if (err <= 0) {
833 				pr_err("Couldn't record tracing data.\n");
834 				goto out;
835 			}
836 			rec->bytes_written += err;
837 		}
838 	}
839 
840 	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
841 					  process_synthesized_event, machine);
842 	if (err)
843 		goto out;
844 
845 	if (rec->opts.full_auxtrace) {
846 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
847 					session, process_synthesized_event);
848 		if (err)
849 			goto out;
850 	}
851 
852 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
853 						 machine);
854 	WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
855 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
856 			   "Check /proc/kallsyms permission or run as root.\n");
857 
858 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
859 					     machine);
860 	WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
861 			   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
862 			   "Check /proc/modules permission or run as root.\n");
863 
864 	if (perf_guest) {
865 		machines__process_guests(&session->machines,
866 					 perf_event__synthesize_guest_os, tool);
867 	}
868 
869 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
870 					    process_synthesized_event, opts->sample_address,
871 					    opts->proc_map_timeout);
872 out:
873 	return err;
874 }
875 
__cmd_record(struct record * rec,int argc,const char ** argv)876 static int __cmd_record(struct record *rec, int argc, const char **argv)
877 {
878 	int err;
879 	int status = 0;
880 	unsigned long waking = 0;
881 	const bool forks = argc > 0;
882 	struct machine *machine;
883 	struct perf_tool *tool = &rec->tool;
884 	struct record_opts *opts = &rec->opts;
885 	struct perf_data_file *file = &rec->file;
886 	struct perf_session *session;
887 	bool disabled = false, draining = false;
888 	int fd;
889 
890 	rec->progname = argv[0];
891 
892 	atexit(record__sig_exit);
893 	signal(SIGCHLD, sig_handler);
894 	signal(SIGINT, sig_handler);
895 	signal(SIGTERM, sig_handler);
896 	signal(SIGSEGV, sigsegv_handler);
897 
898 	if (rec->opts.record_namespaces)
899 		tool->namespace_events = true;
900 
901 	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
902 		signal(SIGUSR2, snapshot_sig_handler);
903 		if (rec->opts.auxtrace_snapshot_mode)
904 			trigger_on(&auxtrace_snapshot_trigger);
905 		if (rec->switch_output.enabled)
906 			trigger_on(&switch_output_trigger);
907 	} else {
908 		signal(SIGUSR2, SIG_IGN);
909 	}
910 
911 	session = perf_session__new(file, false, tool);
912 	if (session == NULL) {
913 		pr_err("Perf session creation failed.\n");
914 		return -1;
915 	}
916 
917 	fd = perf_data_file__fd(file);
918 	rec->session = session;
919 
920 	record__init_features(rec);
921 
922 	if (forks) {
923 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
924 						    argv, file->is_pipe,
925 						    workload_exec_failed_signal);
926 		if (err < 0) {
927 			pr_err("Couldn't run the workload!\n");
928 			status = err;
929 			goto out_delete_session;
930 		}
931 	}
932 
933 	if (record__open(rec) != 0) {
934 		err = -1;
935 		goto out_child;
936 	}
937 
938 	err = bpf__apply_obj_config();
939 	if (err) {
940 		char errbuf[BUFSIZ];
941 
942 		bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
943 		pr_err("ERROR: Apply config to BPF failed: %s\n",
944 			 errbuf);
945 		goto out_child;
946 	}
947 
948 	/*
949 	 * Normally perf_session__new would do this, but it doesn't have the
950 	 * evlist.
951 	 */
952 	if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
953 		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
954 		rec->tool.ordered_events = false;
955 	}
956 
957 	if (!rec->evlist->nr_groups)
958 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
959 
960 	if (file->is_pipe) {
961 		err = perf_header__write_pipe(fd);
962 		if (err < 0)
963 			goto out_child;
964 	} else {
965 		err = perf_session__write_header(session, rec->evlist, fd, false);
966 		if (err < 0)
967 			goto out_child;
968 	}
969 
970 	if (!rec->no_buildid
971 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
972 		pr_err("Couldn't generate buildids. "
973 		       "Use --no-buildid to profile anyway.\n");
974 		err = -1;
975 		goto out_child;
976 	}
977 
978 	machine = &session->machines.host;
979 
980 	err = record__synthesize(rec, false);
981 	if (err < 0)
982 		goto out_child;
983 
984 	if (rec->realtime_prio) {
985 		struct sched_param param;
986 
987 		param.sched_priority = rec->realtime_prio;
988 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
989 			pr_err("Could not set realtime priority.\n");
990 			err = -1;
991 			goto out_child;
992 		}
993 	}
994 
995 	/*
996 	 * When perf is starting the traced process, all the events
997 	 * (apart from group members) have enable_on_exec=1 set,
998 	 * so don't spoil it by prematurely enabling them.
999 	 */
1000 	if (!target__none(&opts->target) && !opts->initial_delay)
1001 		perf_evlist__enable(rec->evlist);
1002 
1003 	/*
1004 	 * Let the child rip
1005 	 */
1006 	if (forks) {
1007 		union perf_event *event;
1008 		pid_t tgid;
1009 
1010 		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1011 		if (event == NULL) {
1012 			err = -ENOMEM;
1013 			goto out_child;
1014 		}
1015 
1016 		/*
1017 		 * Some H/W events are generated before COMM event
1018 		 * which is emitted during exec(), so perf script
1019 		 * cannot see a correct process name for those events.
1020 		 * Synthesize COMM event to prevent it.
1021 		 */
1022 		tgid = perf_event__synthesize_comm(tool, event,
1023 						   rec->evlist->workload.pid,
1024 						   process_synthesized_event,
1025 						   machine);
1026 		free(event);
1027 
1028 		if (tgid == -1)
1029 			goto out_child;
1030 
1031 		event = malloc(sizeof(event->namespaces) +
1032 			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1033 			       machine->id_hdr_size);
1034 		if (event == NULL) {
1035 			err = -ENOMEM;
1036 			goto out_child;
1037 		}
1038 
1039 		/*
1040 		 * Synthesize NAMESPACES event for the command specified.
1041 		 */
1042 		perf_event__synthesize_namespaces(tool, event,
1043 						  rec->evlist->workload.pid,
1044 						  tgid, process_synthesized_event,
1045 						  machine);
1046 		free(event);
1047 
1048 		perf_evlist__start_workload(rec->evlist);
1049 	}
1050 
1051 	if (opts->initial_delay) {
1052 		usleep(opts->initial_delay * USEC_PER_MSEC);
1053 		perf_evlist__enable(rec->evlist);
1054 	}
1055 
1056 	trigger_ready(&auxtrace_snapshot_trigger);
1057 	trigger_ready(&switch_output_trigger);
1058 	perf_hooks__invoke_record_start();
1059 	for (;;) {
1060 		unsigned long long hits = rec->samples;
1061 
1062 		/*
1063 		 * rec->evlist->bkw_mmap_state is possible to be
1064 		 * BKW_MMAP_EMPTY here: when done == true and
1065 		 * hits != rec->samples in previous round.
1066 		 *
1067 		 * perf_evlist__toggle_bkw_mmap ensure we never
1068 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1069 		 */
1070 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
1071 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1072 
1073 		if (record__mmap_read_all(rec) < 0) {
1074 			trigger_error(&auxtrace_snapshot_trigger);
1075 			trigger_error(&switch_output_trigger);
1076 			err = -1;
1077 			goto out_child;
1078 		}
1079 
1080 		if (auxtrace_record__snapshot_started) {
1081 			auxtrace_record__snapshot_started = 0;
1082 			if (!trigger_is_error(&auxtrace_snapshot_trigger))
1083 				record__read_auxtrace_snapshot(rec);
1084 			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
1085 				pr_err("AUX area tracing snapshot failed\n");
1086 				err = -1;
1087 				goto out_child;
1088 			}
1089 		}
1090 
1091 		if (trigger_is_hit(&switch_output_trigger)) {
1092 			/*
1093 			 * If switch_output_trigger is hit, the data in
1094 			 * overwritable ring buffer should have been collected,
1095 			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1096 			 *
1097 			 * If SIGUSR2 raise after or during record__mmap_read_all(),
1098 			 * record__mmap_read_all() didn't collect data from
1099 			 * overwritable ring buffer. Read again.
1100 			 */
1101 			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1102 				continue;
1103 			trigger_ready(&switch_output_trigger);
1104 
1105 			/*
1106 			 * Reenable events in overwrite ring buffer after
1107 			 * record__mmap_read_all(): we should have collected
1108 			 * data from it.
1109 			 */
1110 			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1111 
1112 			if (!quiet)
1113 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1114 					waking);
1115 			waking = 0;
1116 			fd = record__switch_output(rec, false);
1117 			if (fd < 0) {
1118 				pr_err("Failed to switch to new file\n");
1119 				trigger_error(&switch_output_trigger);
1120 				err = fd;
1121 				goto out_child;
1122 			}
1123 
1124 			/* re-arm the alarm */
1125 			if (rec->switch_output.time)
1126 				alarm(rec->switch_output.time);
1127 		}
1128 
1129 		if (hits == rec->samples) {
1130 			if (done || draining)
1131 				break;
1132 			err = perf_evlist__poll(rec->evlist, -1);
1133 			/*
1134 			 * Propagate error, only if there's any. Ignore positive
1135 			 * number of returned events and interrupt error.
1136 			 */
1137 			if (err > 0 || (err < 0 && errno == EINTR))
1138 				err = 0;
1139 			waking++;
1140 
1141 			if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1142 				draining = true;
1143 		}
1144 
1145 		/*
1146 		 * When perf is starting the traced process, at the end events
1147 		 * die with the process and we wait for that. Thus no need to
1148 		 * disable events in this case.
1149 		 */
1150 		if (done && !disabled && !target__none(&opts->target)) {
1151 			trigger_off(&auxtrace_snapshot_trigger);
1152 			perf_evlist__disable(rec->evlist);
1153 			disabled = true;
1154 		}
1155 	}
1156 	trigger_off(&auxtrace_snapshot_trigger);
1157 	trigger_off(&switch_output_trigger);
1158 
1159 	if (forks && workload_exec_errno) {
1160 		char msg[STRERR_BUFSIZE];
1161 		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
1162 		pr_err("Workload failed: %s\n", emsg);
1163 		err = -1;
1164 		goto out_child;
1165 	}
1166 
1167 	if (!quiet)
1168 		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
1169 
1170 	if (target__none(&rec->opts.target))
1171 		record__synthesize_workload(rec, true);
1172 
1173 out_child:
1174 	if (forks) {
1175 		int exit_status;
1176 
1177 		if (!child_finished)
1178 			kill(rec->evlist->workload.pid, SIGTERM);
1179 
1180 		wait(&exit_status);
1181 
1182 		if (err < 0)
1183 			status = err;
1184 		else if (WIFEXITED(exit_status))
1185 			status = WEXITSTATUS(exit_status);
1186 		else if (WIFSIGNALED(exit_status))
1187 			signr = WTERMSIG(exit_status);
1188 	} else
1189 		status = err;
1190 
1191 	record__synthesize(rec, true);
1192 	/* this will be recalculated during process_buildids() */
1193 	rec->samples = 0;
1194 
1195 	if (!err) {
1196 		if (!rec->timestamp_filename) {
1197 			record__finish_output(rec);
1198 		} else {
1199 			fd = record__switch_output(rec, true);
1200 			if (fd < 0) {
1201 				status = fd;
1202 				goto out_delete_session;
1203 			}
1204 		}
1205 	}
1206 
1207 	perf_hooks__invoke_record_end();
1208 
1209 	if (!err && !quiet) {
1210 		char samples[128];
1211 		const char *postfix = rec->timestamp_filename ?
1212 					".<timestamp>" : "";
1213 
1214 		if (rec->samples && !rec->opts.full_auxtrace)
1215 			scnprintf(samples, sizeof(samples),
1216 				  " (%" PRIu64 " samples)", rec->samples);
1217 		else
1218 			samples[0] = '\0';
1219 
1220 		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1221 			perf_data_file__size(file) / 1024.0 / 1024.0,
1222 			file->path, postfix, samples);
1223 	}
1224 
1225 out_delete_session:
1226 	perf_session__delete(session);
1227 	return status;
1228 }
1229 
callchain_debug(struct callchain_param * callchain)1230 static void callchain_debug(struct callchain_param *callchain)
1231 {
1232 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
1233 
1234 	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
1235 
1236 	if (callchain->record_mode == CALLCHAIN_DWARF)
1237 		pr_debug("callchain: stack dump size %d\n",
1238 			 callchain->dump_size);
1239 }
1240 
record_opts__parse_callchain(struct record_opts * record,struct callchain_param * callchain,const char * arg,bool unset)1241 int record_opts__parse_callchain(struct record_opts *record,
1242 				 struct callchain_param *callchain,
1243 				 const char *arg, bool unset)
1244 {
1245 	int ret;
1246 	callchain->enabled = !unset;
1247 
1248 	/* --no-call-graph */
1249 	if (unset) {
1250 		callchain->record_mode = CALLCHAIN_NONE;
1251 		pr_debug("callchain: disabled\n");
1252 		return 0;
1253 	}
1254 
1255 	ret = parse_callchain_record_opt(arg, callchain);
1256 	if (!ret) {
1257 		/* Enable data address sampling for DWARF unwind. */
1258 		if (callchain->record_mode == CALLCHAIN_DWARF)
1259 			record->sample_address = true;
1260 		callchain_debug(callchain);
1261 	}
1262 
1263 	return ret;
1264 }
1265 
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)1266 int record_parse_callchain_opt(const struct option *opt,
1267 			       const char *arg,
1268 			       int unset)
1269 {
1270 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1271 }
1272 
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)1273 int record_callchain_opt(const struct option *opt,
1274 			 const char *arg __maybe_unused,
1275 			 int unset __maybe_unused)
1276 {
1277 	struct callchain_param *callchain = opt->value;
1278 
1279 	callchain->enabled = true;
1280 
1281 	if (callchain->record_mode == CALLCHAIN_NONE)
1282 		callchain->record_mode = CALLCHAIN_FP;
1283 
1284 	callchain_debug(callchain);
1285 	return 0;
1286 }
1287 
perf_record_config(const char * var,const char * value,void * cb)1288 static int perf_record_config(const char *var, const char *value, void *cb)
1289 {
1290 	struct record *rec = cb;
1291 
1292 	if (!strcmp(var, "record.build-id")) {
1293 		if (!strcmp(value, "cache"))
1294 			rec->no_buildid_cache = false;
1295 		else if (!strcmp(value, "no-cache"))
1296 			rec->no_buildid_cache = true;
1297 		else if (!strcmp(value, "skip"))
1298 			rec->no_buildid = true;
1299 		else
1300 			return -1;
1301 		return 0;
1302 	}
1303 	if (!strcmp(var, "record.call-graph"))
1304 		var = "call-graph.record-mode"; /* fall-through */
1305 
1306 	return perf_default_config(var, value, cb);
1307 }
1308 
1309 struct clockid_map {
1310 	const char *name;
1311 	int clockid;
1312 };
1313 
1314 #define CLOCKID_MAP(n, c)	\
1315 	{ .name = n, .clockid = (c), }
1316 
1317 #define CLOCKID_END	{ .name = NULL, }
1318 
1319 
1320 /*
1321  * Add the missing ones, we need to build on many distros...
1322  */
1323 #ifndef CLOCK_MONOTONIC_RAW
1324 #define CLOCK_MONOTONIC_RAW 4
1325 #endif
1326 #ifndef CLOCK_BOOTTIME
1327 #define CLOCK_BOOTTIME 7
1328 #endif
1329 #ifndef CLOCK_TAI
1330 #define CLOCK_TAI 11
1331 #endif
1332 
1333 static const struct clockid_map clockids[] = {
1334 	/* available for all events, NMI safe */
1335 	CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1336 	CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1337 
1338 	/* available for some events */
1339 	CLOCKID_MAP("realtime", CLOCK_REALTIME),
1340 	CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1341 	CLOCKID_MAP("tai", CLOCK_TAI),
1342 
1343 	/* available for the lazy */
1344 	CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1345 	CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1346 	CLOCKID_MAP("real", CLOCK_REALTIME),
1347 	CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1348 
1349 	CLOCKID_END,
1350 };
1351 
parse_clockid(const struct option * opt,const char * str,int unset)1352 static int parse_clockid(const struct option *opt, const char *str, int unset)
1353 {
1354 	struct record_opts *opts = (struct record_opts *)opt->value;
1355 	const struct clockid_map *cm;
1356 	const char *ostr = str;
1357 
1358 	if (unset) {
1359 		opts->use_clockid = 0;
1360 		return 0;
1361 	}
1362 
1363 	/* no arg passed */
1364 	if (!str)
1365 		return 0;
1366 
1367 	/* no setting it twice */
1368 	if (opts->use_clockid)
1369 		return -1;
1370 
1371 	opts->use_clockid = true;
1372 
1373 	/* if its a number, we're done */
1374 	if (sscanf(str, "%d", &opts->clockid) == 1)
1375 		return 0;
1376 
1377 	/* allow a "CLOCK_" prefix to the name */
1378 	if (!strncasecmp(str, "CLOCK_", 6))
1379 		str += 6;
1380 
1381 	for (cm = clockids; cm->name; cm++) {
1382 		if (!strcasecmp(str, cm->name)) {
1383 			opts->clockid = cm->clockid;
1384 			return 0;
1385 		}
1386 	}
1387 
1388 	opts->use_clockid = false;
1389 	ui__warning("unknown clockid %s, check man page\n", ostr);
1390 	return -1;
1391 }
1392 
record__parse_mmap_pages(const struct option * opt,const char * str,int unset __maybe_unused)1393 static int record__parse_mmap_pages(const struct option *opt,
1394 				    const char *str,
1395 				    int unset __maybe_unused)
1396 {
1397 	struct record_opts *opts = opt->value;
1398 	char *s, *p;
1399 	unsigned int mmap_pages;
1400 	int ret;
1401 
1402 	if (!str)
1403 		return -EINVAL;
1404 
1405 	s = strdup(str);
1406 	if (!s)
1407 		return -ENOMEM;
1408 
1409 	p = strchr(s, ',');
1410 	if (p)
1411 		*p = '\0';
1412 
1413 	if (*s) {
1414 		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1415 		if (ret)
1416 			goto out_free;
1417 		opts->mmap_pages = mmap_pages;
1418 	}
1419 
1420 	if (!p) {
1421 		ret = 0;
1422 		goto out_free;
1423 	}
1424 
1425 	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1426 	if (ret)
1427 		goto out_free;
1428 
1429 	opts->auxtrace_mmap_pages = mmap_pages;
1430 
1431 out_free:
1432 	free(s);
1433 	return ret;
1434 }
1435 
switch_output_size_warn(struct record * rec)1436 static void switch_output_size_warn(struct record *rec)
1437 {
1438 	u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1439 	struct switch_output *s = &rec->switch_output;
1440 
1441 	wakeup_size /= 2;
1442 
1443 	if (s->size < wakeup_size) {
1444 		char buf[100];
1445 
1446 		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1447 		pr_warning("WARNING: switch-output data size lower than "
1448 			   "wakeup kernel buffer size (%s) "
1449 			   "expect bigger perf.data sizes\n", buf);
1450 	}
1451 }
1452 
switch_output_setup(struct record * rec)1453 static int switch_output_setup(struct record *rec)
1454 {
1455 	struct switch_output *s = &rec->switch_output;
1456 	static struct parse_tag tags_size[] = {
1457 		{ .tag  = 'B', .mult = 1       },
1458 		{ .tag  = 'K', .mult = 1 << 10 },
1459 		{ .tag  = 'M', .mult = 1 << 20 },
1460 		{ .tag  = 'G', .mult = 1 << 30 },
1461 		{ .tag  = 0 },
1462 	};
1463 	static struct parse_tag tags_time[] = {
1464 		{ .tag  = 's', .mult = 1        },
1465 		{ .tag  = 'm', .mult = 60       },
1466 		{ .tag  = 'h', .mult = 60*60    },
1467 		{ .tag  = 'd', .mult = 60*60*24 },
1468 		{ .tag  = 0 },
1469 	};
1470 	unsigned long val;
1471 
1472 	if (!s->set)
1473 		return 0;
1474 
1475 	if (!strcmp(s->str, "signal")) {
1476 		s->signal = true;
1477 		pr_debug("switch-output with SIGUSR2 signal\n");
1478 		goto enabled;
1479 	}
1480 
1481 	val = parse_tag_value(s->str, tags_size);
1482 	if (val != (unsigned long) -1) {
1483 		s->size = val;
1484 		pr_debug("switch-output with %s size threshold\n", s->str);
1485 		goto enabled;
1486 	}
1487 
1488 	val = parse_tag_value(s->str, tags_time);
1489 	if (val != (unsigned long) -1) {
1490 		s->time = val;
1491 		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1492 			 s->str, s->time);
1493 		goto enabled;
1494 	}
1495 
1496 	return -1;
1497 
1498 enabled:
1499 	rec->timestamp_filename = true;
1500 	s->enabled              = true;
1501 
1502 	if (s->size && !rec->opts.no_buffering)
1503 		switch_output_size_warn(rec);
1504 
1505 	return 0;
1506 }
1507 
1508 static const char * const __record_usage[] = {
1509 	"perf record [<options>] [<command>]",
1510 	"perf record [<options>] -- <command> [<options>]",
1511 	NULL
1512 };
1513 const char * const *record_usage = __record_usage;
1514 
1515 /*
1516  * XXX Ideally would be local to cmd_record() and passed to a record__new
1517  * because we need to have access to it in record__exit, that is called
1518  * after cmd_record() exits, but since record_options need to be accessible to
1519  * builtin-script, leave it here.
1520  *
1521  * At least we don't ouch it in all the other functions here directly.
1522  *
1523  * Just say no to tons of global variables, sigh.
1524  */
1525 static struct record record = {
1526 	.opts = {
1527 		.sample_time	     = true,
1528 		.mmap_pages	     = UINT_MAX,
1529 		.user_freq	     = UINT_MAX,
1530 		.user_interval	     = ULLONG_MAX,
1531 		.freq		     = 4000,
1532 		.target		     = {
1533 			.uses_mmap   = true,
1534 			.default_per_cpu = true,
1535 		},
1536 		.proc_map_timeout     = 500,
1537 	},
1538 	.tool = {
1539 		.sample		= process_sample_event,
1540 		.fork		= perf_event__process_fork,
1541 		.exit		= perf_event__process_exit,
1542 		.comm		= perf_event__process_comm,
1543 		.namespaces	= perf_event__process_namespaces,
1544 		.mmap		= perf_event__process_mmap,
1545 		.mmap2		= perf_event__process_mmap2,
1546 		.ordered_events	= true,
1547 	},
1548 };
1549 
1550 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1551 	"\n\t\t\t\tDefault: fp";
1552 
1553 static bool dry_run;
1554 
1555 /*
1556  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1557  * with it and switch to use the library functions in perf_evlist that came
1558  * from builtin-record.c, i.e. use record_opts,
1559  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1560  * using pipes, etc.
1561  */
1562 static struct option __record_options[] = {
1563 	OPT_CALLBACK('e', "event", &record.evlist, "event",
1564 		     "event selector. use 'perf list' to list available events",
1565 		     parse_events_option),
1566 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1567 		     "event filter", parse_filter),
1568 	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1569 			   NULL, "don't record events from perf itself",
1570 			   exclude_perf),
1571 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1572 		    "record events on existing process id"),
1573 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1574 		    "record events on existing thread id"),
1575 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
1576 		    "collect data with this RT SCHED_FIFO priority"),
1577 	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
1578 		    "collect data without buffering"),
1579 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1580 		    "collect raw sample records from all opened counters"),
1581 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1582 			    "system-wide collection from all CPUs"),
1583 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1584 		    "list of cpus to monitor"),
1585 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1586 	OPT_STRING('o', "output", &record.file.path, "file",
1587 		    "output file name"),
1588 	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1589 			&record.opts.no_inherit_set,
1590 			"child tasks do not inherit counters"),
1591 	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1592 		    "synthesize non-sample events at the end of output"),
1593 	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1594 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1595 	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1596 		     "number of mmap data pages and AUX area tracing mmap pages",
1597 		     record__parse_mmap_pages),
1598 	OPT_BOOLEAN(0, "group", &record.opts.group,
1599 		    "put the counters into a counter group"),
1600 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
1601 			   NULL, "enables call-graph recording" ,
1602 			   &record_callchain_opt),
1603 	OPT_CALLBACK(0, "call-graph", &record.opts,
1604 		     "record_mode[,record_size]", record_callchain_help,
1605 		     &record_parse_callchain_opt),
1606 	OPT_INCR('v', "verbose", &verbose,
1607 		    "be more verbose (show counter open errors, etc)"),
1608 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1609 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1610 		    "per thread counts"),
1611 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
1612 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1613 		    "Record the sample physical addresses"),
1614 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
1615 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1616 			&record.opts.sample_time_set,
1617 			"Record the sample timestamps"),
1618 	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1619 			"Record the sample period"),
1620 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1621 		    "don't sample"),
1622 	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1623 			&record.no_buildid_cache_set,
1624 			"do not update the buildid cache"),
1625 	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1626 			&record.no_buildid_set,
1627 			"do not collect buildids in perf.data"),
1628 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1629 		     "monitor event in cgroup name only",
1630 		     parse_cgroups),
1631 	OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
1632 		  "ms to wait before starting measurement after program start"),
1633 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1634 		   "user to profile"),
1635 
1636 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1637 		     "branch any", "sample any taken branches",
1638 		     parse_branch_stack),
1639 
1640 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1641 		     "branch filter mask", "branch stack filter modes",
1642 		     parse_branch_stack),
1643 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1644 		    "sample by weight (on special events only)"),
1645 	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1646 		    "sample transaction flags (special events only)"),
1647 	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1648 		    "use per-thread mmaps"),
1649 	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1650 		    "sample selected machine registers on interrupt,"
1651 		    " use -I ? to list register names", parse_regs),
1652 	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1653 		    "Record running/enabled time of read (:S) events"),
1654 	OPT_CALLBACK('k', "clockid", &record.opts,
1655 	"clockid", "clockid to use for events, see clock_gettime()",
1656 	parse_clockid),
1657 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1658 			  "opts", "AUX area tracing Snapshot Mode", ""),
1659 	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
1660 			"per thread proc mmap processing timeout in ms"),
1661 	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1662 		    "Record namespaces events"),
1663 	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1664 		    "Record context switch events"),
1665 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1666 			 "Configure all used events to run in kernel space.",
1667 			 PARSE_OPT_EXCLUSIVE),
1668 	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1669 			 "Configure all used events to run in user space.",
1670 			 PARSE_OPT_EXCLUSIVE),
1671 	OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1672 		   "clang binary to use for compiling BPF scriptlets"),
1673 	OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1674 		   "options passed to clang when compiling BPF scriptlets"),
1675 	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1676 		   "file", "vmlinux pathname"),
1677 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1678 		    "Record build-id of all DSOs regardless of hits"),
1679 	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1680 		    "append timestamp to output filename"),
1681 	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
1682 			  &record.switch_output.set, "signal,size,time",
1683 			  "Switch output when receive SIGUSR2 or cross size,time threshold",
1684 			  "signal"),
1685 	OPT_BOOLEAN(0, "dry-run", &dry_run,
1686 		    "Parse options then exit"),
1687 	OPT_END()
1688 };
1689 
1690 struct option *record_options = __record_options;
1691 
cmd_record(int argc,const char ** argv)1692 int cmd_record(int argc, const char **argv)
1693 {
1694 	int err;
1695 	struct record *rec = &record;
1696 	char errbuf[BUFSIZ];
1697 
1698 #ifndef HAVE_LIBBPF_SUPPORT
1699 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
1700 	set_nobuild('\0', "clang-path", true);
1701 	set_nobuild('\0', "clang-opt", true);
1702 # undef set_nobuild
1703 #endif
1704 
1705 #ifndef HAVE_BPF_PROLOGUE
1706 # if !defined (HAVE_DWARF_SUPPORT)
1707 #  define REASON  "NO_DWARF=1"
1708 # elif !defined (HAVE_LIBBPF_SUPPORT)
1709 #  define REASON  "NO_LIBBPF=1"
1710 # else
1711 #  define REASON  "this architecture doesn't support BPF prologue"
1712 # endif
1713 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
1714 	set_nobuild('\0', "vmlinux", true);
1715 # undef set_nobuild
1716 # undef REASON
1717 #endif
1718 
1719 	rec->evlist = perf_evlist__new();
1720 	if (rec->evlist == NULL)
1721 		return -ENOMEM;
1722 
1723 	err = perf_config(perf_record_config, rec);
1724 	if (err)
1725 		return err;
1726 
1727 	argc = parse_options(argc, argv, record_options, record_usage,
1728 			    PARSE_OPT_STOP_AT_NON_OPTION);
1729 	if (quiet)
1730 		perf_quiet_option();
1731 
1732 	/* Make system wide (-a) the default target. */
1733 	if (!argc && target__none(&rec->opts.target))
1734 		rec->opts.target.system_wide = true;
1735 
1736 	if (nr_cgroups && !rec->opts.target.system_wide) {
1737 		usage_with_options_msg(record_usage, record_options,
1738 			"cgroup monitoring only available in system-wide mode");
1739 
1740 	}
1741 	if (rec->opts.record_switch_events &&
1742 	    !perf_can_record_switch_events()) {
1743 		ui__error("kernel does not support recording context switch events\n");
1744 		parse_options_usage(record_usage, record_options, "switch-events", 0);
1745 		return -EINVAL;
1746 	}
1747 
1748 	if (switch_output_setup(rec)) {
1749 		parse_options_usage(record_usage, record_options, "switch-output", 0);
1750 		return -EINVAL;
1751 	}
1752 
1753 	if (rec->switch_output.time) {
1754 		signal(SIGALRM, alarm_sig_handler);
1755 		alarm(rec->switch_output.time);
1756 	}
1757 
1758 	if (!rec->itr) {
1759 		rec->itr = auxtrace_record__init(rec->evlist, &err);
1760 		if (err)
1761 			goto out;
1762 	}
1763 
1764 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
1765 					      rec->opts.auxtrace_snapshot_opts);
1766 	if (err)
1767 		goto out;
1768 
1769 	/*
1770 	 * Allow aliases to facilitate the lookup of symbols for address
1771 	 * filters. Refer to auxtrace_parse_filters().
1772 	 */
1773 	symbol_conf.allow_aliases = true;
1774 
1775 	symbol__init(NULL);
1776 
1777 	err = auxtrace_parse_filters(rec->evlist);
1778 	if (err)
1779 		goto out;
1780 
1781 	if (dry_run)
1782 		goto out;
1783 
1784 	err = bpf__setup_stdout(rec->evlist);
1785 	if (err) {
1786 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
1787 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
1788 			 errbuf);
1789 		goto out;
1790 	}
1791 
1792 	err = -ENOMEM;
1793 
1794 	if (symbol_conf.kptr_restrict)
1795 		pr_warning(
1796 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1797 "check /proc/sys/kernel/kptr_restrict.\n\n"
1798 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1799 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1800 "Samples in kernel modules won't be resolved at all.\n\n"
1801 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1802 "even with a suitable vmlinux or kallsyms file.\n\n");
1803 
1804 	if (rec->no_buildid_cache || rec->no_buildid) {
1805 		disable_buildid_cache();
1806 	} else if (rec->switch_output.enabled) {
1807 		/*
1808 		 * In 'perf record --switch-output', disable buildid
1809 		 * generation by default to reduce data file switching
1810 		 * overhead. Still generate buildid if they are required
1811 		 * explicitly using
1812 		 *
1813 		 *  perf record --switch-output --no-no-buildid \
1814 		 *              --no-no-buildid-cache
1815 		 *
1816 		 * Following code equals to:
1817 		 *
1818 		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
1819 		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
1820 		 *         disable_buildid_cache();
1821 		 */
1822 		bool disable = true;
1823 
1824 		if (rec->no_buildid_set && !rec->no_buildid)
1825 			disable = false;
1826 		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
1827 			disable = false;
1828 		if (disable) {
1829 			rec->no_buildid = true;
1830 			rec->no_buildid_cache = true;
1831 			disable_buildid_cache();
1832 		}
1833 	}
1834 
1835 	if (record.opts.overwrite)
1836 		record.opts.tail_synthesize = true;
1837 
1838 	if (rec->evlist->nr_entries == 0 &&
1839 	    __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
1840 		pr_err("Not enough memory for event selector list\n");
1841 		goto out;
1842 	}
1843 
1844 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
1845 		rec->opts.no_inherit = true;
1846 
1847 	err = target__validate(&rec->opts.target);
1848 	if (err) {
1849 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1850 		ui__warning("%s", errbuf);
1851 	}
1852 
1853 	err = target__parse_uid(&rec->opts.target);
1854 	if (err) {
1855 		int saved_errno = errno;
1856 
1857 		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1858 		ui__error("%s", errbuf);
1859 
1860 		err = -saved_errno;
1861 		goto out;
1862 	}
1863 
1864 	/* Enable ignoring missing threads when -u/-p option is defined. */
1865 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
1866 
1867 	err = -ENOMEM;
1868 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
1869 		usage_with_options(record_usage, record_options);
1870 
1871 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
1872 	if (err)
1873 		goto out;
1874 
1875 	/*
1876 	 * We take all buildids when the file contains
1877 	 * AUX area tracing data because we do not decode the
1878 	 * trace because it would take too long.
1879 	 */
1880 	if (rec->opts.full_auxtrace)
1881 		rec->buildid_all = true;
1882 
1883 	if (record_opts__config(&rec->opts)) {
1884 		err = -EINVAL;
1885 		goto out;
1886 	}
1887 
1888 	err = __cmd_record(&record, argc, argv);
1889 out:
1890 	perf_evlist__delete(rec->evlist);
1891 	symbol__exit();
1892 	auxtrace_record__free(rec->itr);
1893 	return err;
1894 }
1895 
snapshot_sig_handler(int sig __maybe_unused)1896 static void snapshot_sig_handler(int sig __maybe_unused)
1897 {
1898 	struct record *rec = &record;
1899 
1900 	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
1901 		trigger_hit(&auxtrace_snapshot_trigger);
1902 		auxtrace_record__snapshot_started = 1;
1903 		if (auxtrace_record__snapshot_start(record.itr))
1904 			trigger_error(&auxtrace_snapshot_trigger);
1905 	}
1906 
1907 	if (switch_output_signal(rec))
1908 		trigger_hit(&switch_output_trigger);
1909 }
1910 
alarm_sig_handler(int sig __maybe_unused)1911 static void alarm_sig_handler(int sig __maybe_unused)
1912 {
1913 	struct record *rec = &record;
1914 
1915 	if (switch_output_time(rec))
1916 		trigger_hit(&switch_output_trigger);
1917 }
1918