• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #define MAX_TIMESTAMP (~0ULL)
38 
39 struct arm_spe {
40 	struct auxtrace			auxtrace;
41 	struct auxtrace_queues		queues;
42 	struct auxtrace_heap		heap;
43 	struct itrace_synth_opts        synth_opts;
44 	u32				auxtrace_type;
45 	struct perf_session		*session;
46 	struct machine			*machine;
47 	u32				pmu_type;
48 
49 	struct perf_tsc_conversion	tc;
50 
51 	u8				timeless_decoding;
52 	u8				data_queued;
53 
54 	u64				sample_type;
55 	u8				sample_flc;
56 	u8				sample_llc;
57 	u8				sample_tlb;
58 	u8				sample_branch;
59 	u8				sample_remote_access;
60 	u8				sample_memory;
61 
62 	u64				l1d_miss_id;
63 	u64				l1d_access_id;
64 	u64				llc_miss_id;
65 	u64				llc_access_id;
66 	u64				tlb_miss_id;
67 	u64				tlb_access_id;
68 	u64				branch_miss_id;
69 	u64				remote_access_id;
70 	u64				memory_id;
71 
72 	u64				kernel_start;
73 
74 	unsigned long			num_events;
75 };
76 
77 struct arm_spe_queue {
78 	struct arm_spe			*spe;
79 	unsigned int			queue_nr;
80 	struct auxtrace_buffer		*buffer;
81 	struct auxtrace_buffer		*old_buffer;
82 	union perf_event		*event_buf;
83 	bool				on_heap;
84 	bool				done;
85 	pid_t				pid;
86 	pid_t				tid;
87 	int				cpu;
88 	struct arm_spe_decoder		*decoder;
89 	u64				time;
90 	u64				timestamp;
91 	struct thread			*thread;
92 };
93 
arm_spe_dump(struct arm_spe * spe __maybe_unused,unsigned char * buf,size_t len)94 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
95 			 unsigned char *buf, size_t len)
96 {
97 	struct arm_spe_pkt packet;
98 	size_t pos = 0;
99 	int ret, pkt_len, i;
100 	char desc[ARM_SPE_PKT_DESC_MAX];
101 	const char *color = PERF_COLOR_BLUE;
102 
103 	color_fprintf(stdout, color,
104 		      ". ... ARM SPE data: size %zu bytes\n",
105 		      len);
106 
107 	while (len) {
108 		ret = arm_spe_get_packet(buf, len, &packet);
109 		if (ret > 0)
110 			pkt_len = ret;
111 		else
112 			pkt_len = 1;
113 		printf(".");
114 		color_fprintf(stdout, color, "  %08x: ", pos);
115 		for (i = 0; i < pkt_len; i++)
116 			color_fprintf(stdout, color, " %02x", buf[i]);
117 		for (; i < 16; i++)
118 			color_fprintf(stdout, color, "   ");
119 		if (ret > 0) {
120 			ret = arm_spe_pkt_desc(&packet, desc,
121 					       ARM_SPE_PKT_DESC_MAX);
122 			if (!ret)
123 				color_fprintf(stdout, color, " %s\n", desc);
124 		} else {
125 			color_fprintf(stdout, color, " Bad packet!\n");
126 		}
127 		pos += pkt_len;
128 		buf += pkt_len;
129 		len -= pkt_len;
130 	}
131 }
132 
arm_spe_dump_event(struct arm_spe * spe,unsigned char * buf,size_t len)133 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
134 			       size_t len)
135 {
136 	printf(".\n");
137 	arm_spe_dump(spe, buf, len);
138 }
139 
arm_spe_get_trace(struct arm_spe_buffer * b,void * data)140 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
141 {
142 	struct arm_spe_queue *speq = data;
143 	struct auxtrace_buffer *buffer = speq->buffer;
144 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
145 	struct auxtrace_queue *queue;
146 
147 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
148 
149 	buffer = auxtrace_buffer__next(queue, buffer);
150 	/* If no more data, drop the previous auxtrace_buffer and return */
151 	if (!buffer) {
152 		if (old_buffer)
153 			auxtrace_buffer__drop_data(old_buffer);
154 		b->len = 0;
155 		return 0;
156 	}
157 
158 	speq->buffer = buffer;
159 
160 	/* If the aux_buffer doesn't have data associated, try to load it */
161 	if (!buffer->data) {
162 		/* get the file desc associated with the perf data file */
163 		int fd = perf_data__fd(speq->spe->session->data);
164 
165 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
166 		if (!buffer->data)
167 			return -ENOMEM;
168 	}
169 
170 	b->len = buffer->size;
171 	b->buf = buffer->data;
172 
173 	if (b->len) {
174 		if (old_buffer)
175 			auxtrace_buffer__drop_data(old_buffer);
176 		speq->old_buffer = buffer;
177 	} else {
178 		auxtrace_buffer__drop_data(buffer);
179 		return arm_spe_get_trace(b, data);
180 	}
181 
182 	return 0;
183 }
184 
arm_spe__alloc_queue(struct arm_spe * spe,unsigned int queue_nr)185 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
186 		unsigned int queue_nr)
187 {
188 	struct arm_spe_params params = { .get_trace = 0, };
189 	struct arm_spe_queue *speq;
190 
191 	speq = zalloc(sizeof(*speq));
192 	if (!speq)
193 		return NULL;
194 
195 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
196 	if (!speq->event_buf)
197 		goto out_free;
198 
199 	speq->spe = spe;
200 	speq->queue_nr = queue_nr;
201 	speq->pid = -1;
202 	speq->tid = -1;
203 	speq->cpu = -1;
204 
205 	/* params set */
206 	params.get_trace = arm_spe_get_trace;
207 	params.data = speq;
208 
209 	/* create new decoder */
210 	speq->decoder = arm_spe_decoder_new(&params);
211 	if (!speq->decoder)
212 		goto out_free;
213 
214 	return speq;
215 
216 out_free:
217 	zfree(&speq->event_buf);
218 	free(speq);
219 
220 	return NULL;
221 }
222 
arm_spe_cpumode(struct arm_spe * spe,u64 ip)223 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
224 {
225 	return ip >= spe->kernel_start ?
226 		PERF_RECORD_MISC_KERNEL :
227 		PERF_RECORD_MISC_USER;
228 }
229 
arm_spe_prep_sample(struct arm_spe * spe,struct arm_spe_queue * speq,union perf_event * event,struct perf_sample * sample)230 static void arm_spe_prep_sample(struct arm_spe *spe,
231 				struct arm_spe_queue *speq,
232 				union perf_event *event,
233 				struct perf_sample *sample)
234 {
235 	struct arm_spe_record *record = &speq->decoder->record;
236 
237 	if (!spe->timeless_decoding)
238 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
239 
240 	sample->ip = record->from_ip;
241 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
242 	sample->pid = speq->pid;
243 	sample->tid = speq->tid;
244 	sample->period = 1;
245 	sample->cpu = speq->cpu;
246 
247 	event->sample.header.type = PERF_RECORD_SAMPLE;
248 	event->sample.header.misc = sample->cpumode;
249 	event->sample.header.size = sizeof(struct perf_event_header);
250 }
251 
arm_spe__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)252 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
253 {
254 	event->header.size = perf_event__sample_event_size(sample, type, 0);
255 	return perf_event__synthesize_sample(event, type, 0, sample);
256 }
257 
258 static inline int
arm_spe_deliver_synth_event(struct arm_spe * spe,struct arm_spe_queue * speq __maybe_unused,union perf_event * event,struct perf_sample * sample)259 arm_spe_deliver_synth_event(struct arm_spe *spe,
260 			    struct arm_spe_queue *speq __maybe_unused,
261 			    union perf_event *event,
262 			    struct perf_sample *sample)
263 {
264 	int ret;
265 
266 	if (spe->synth_opts.inject) {
267 		ret = arm_spe__inject_event(event, sample, spe->sample_type);
268 		if (ret)
269 			return ret;
270 	}
271 
272 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
273 	if (ret)
274 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
275 
276 	return ret;
277 }
278 
arm_spe__synth_mem_sample(struct arm_spe_queue * speq,u64 spe_events_id,u64 data_src)279 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
280 				     u64 spe_events_id, u64 data_src)
281 {
282 	struct arm_spe *spe = speq->spe;
283 	struct arm_spe_record *record = &speq->decoder->record;
284 	union perf_event *event = speq->event_buf;
285 	struct perf_sample sample = { .ip = 0, };
286 
287 	arm_spe_prep_sample(spe, speq, event, &sample);
288 
289 	sample.id = spe_events_id;
290 	sample.stream_id = spe_events_id;
291 	sample.addr = record->virt_addr;
292 	sample.phys_addr = record->phys_addr;
293 	sample.data_src = data_src;
294 
295 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
296 }
297 
arm_spe__synth_branch_sample(struct arm_spe_queue * speq,u64 spe_events_id)298 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
299 					u64 spe_events_id)
300 {
301 	struct arm_spe *spe = speq->spe;
302 	struct arm_spe_record *record = &speq->decoder->record;
303 	union perf_event *event = speq->event_buf;
304 	struct perf_sample sample = { .ip = 0, };
305 
306 	arm_spe_prep_sample(spe, speq, event, &sample);
307 
308 	sample.id = spe_events_id;
309 	sample.stream_id = spe_events_id;
310 	sample.addr = record->to_ip;
311 
312 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
313 }
314 
arm_spe__synth_data_source(const struct arm_spe_record * record)315 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
316 {
317 	union perf_mem_data_src	data_src = { 0 };
318 
319 	if (record->op == ARM_SPE_LD)
320 		data_src.mem_op = PERF_MEM_OP_LOAD;
321 	else if (record->op == ARM_SPE_ST)
322 		data_src.mem_op = PERF_MEM_OP_STORE;
323 	else
324 		return 0;
325 
326 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
327 		data_src.mem_lvl = PERF_MEM_LVL_L3;
328 
329 		if (record->type & ARM_SPE_LLC_MISS)
330 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
331 		else
332 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
333 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
334 		data_src.mem_lvl = PERF_MEM_LVL_L1;
335 
336 		if (record->type & ARM_SPE_L1D_MISS)
337 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
338 		else
339 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
340 	}
341 
342 	if (record->type & ARM_SPE_REMOTE_ACCESS)
343 		data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
344 
345 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
346 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
347 
348 		if (record->type & ARM_SPE_TLB_MISS)
349 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
350 		else
351 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
352 	}
353 
354 	return data_src.val;
355 }
356 
arm_spe_sample(struct arm_spe_queue * speq)357 static int arm_spe_sample(struct arm_spe_queue *speq)
358 {
359 	const struct arm_spe_record *record = &speq->decoder->record;
360 	struct arm_spe *spe = speq->spe;
361 	u64 data_src;
362 	int err;
363 
364 	data_src = arm_spe__synth_data_source(record);
365 
366 	if (spe->sample_flc) {
367 		if (record->type & ARM_SPE_L1D_MISS) {
368 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
369 							data_src);
370 			if (err)
371 				return err;
372 		}
373 
374 		if (record->type & ARM_SPE_L1D_ACCESS) {
375 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
376 							data_src);
377 			if (err)
378 				return err;
379 		}
380 	}
381 
382 	if (spe->sample_llc) {
383 		if (record->type & ARM_SPE_LLC_MISS) {
384 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
385 							data_src);
386 			if (err)
387 				return err;
388 		}
389 
390 		if (record->type & ARM_SPE_LLC_ACCESS) {
391 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
392 							data_src);
393 			if (err)
394 				return err;
395 		}
396 	}
397 
398 	if (spe->sample_tlb) {
399 		if (record->type & ARM_SPE_TLB_MISS) {
400 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
401 							data_src);
402 			if (err)
403 				return err;
404 		}
405 
406 		if (record->type & ARM_SPE_TLB_ACCESS) {
407 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
408 							data_src);
409 			if (err)
410 				return err;
411 		}
412 	}
413 
414 	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
415 		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
416 		if (err)
417 			return err;
418 	}
419 
420 	if (spe->sample_remote_access &&
421 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
422 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
423 						data_src);
424 		if (err)
425 			return err;
426 	}
427 
428 	/*
429 	 * When data_src is zero it means the record is not a memory operation,
430 	 * skip to synthesize memory sample for this case.
431 	 */
432 	if (spe->sample_memory && data_src) {
433 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
434 		if (err)
435 			return err;
436 	}
437 
438 	return 0;
439 }
440 
arm_spe_run_decoder(struct arm_spe_queue * speq,u64 * timestamp)441 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
442 {
443 	struct arm_spe *spe = speq->spe;
444 	struct arm_spe_record *record;
445 	int ret;
446 
447 	if (!spe->kernel_start)
448 		spe->kernel_start = machine__kernel_start(spe->machine);
449 
450 	while (1) {
451 		/*
452 		 * The usual logic is firstly to decode the packets, and then
453 		 * based the record to synthesize sample; but here the flow is
454 		 * reversed: it calls arm_spe_sample() for synthesizing samples
455 		 * prior to arm_spe_decode().
456 		 *
457 		 * Two reasons for this code logic:
458 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
459 		 * has decoded trace data and generated a record, but the record
460 		 * is left to generate sample until run to here, so it's correct
461 		 * to synthesize sample for the left record.
462 		 * 2. After decoding trace data, it needs to compare the record
463 		 * timestamp with the coming perf event, if the record timestamp
464 		 * is later than the perf event, it needs bail out and pushs the
465 		 * record into auxtrace heap, thus the record can be deferred to
466 		 * synthesize sample until run to here at the next time; so this
467 		 * can correlate samples between Arm SPE trace data and other
468 		 * perf events with correct time ordering.
469 		 */
470 		ret = arm_spe_sample(speq);
471 		if (ret)
472 			return ret;
473 
474 		ret = arm_spe_decode(speq->decoder);
475 		if (!ret) {
476 			pr_debug("No data or all data has been processed.\n");
477 			return 1;
478 		}
479 
480 		/*
481 		 * Error is detected when decode SPE trace data, continue to
482 		 * the next trace data and find out more records.
483 		 */
484 		if (ret < 0)
485 			continue;
486 
487 		record = &speq->decoder->record;
488 
489 		/* Update timestamp for the last record */
490 		if (record->timestamp > speq->timestamp)
491 			speq->timestamp = record->timestamp;
492 
493 		/*
494 		 * If the timestamp of the queue is later than timestamp of the
495 		 * coming perf event, bail out so can allow the perf event to
496 		 * be processed ahead.
497 		 */
498 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
499 			*timestamp = speq->timestamp;
500 			return 0;
501 		}
502 	}
503 
504 	return 0;
505 }
506 
arm_spe__setup_queue(struct arm_spe * spe,struct auxtrace_queue * queue,unsigned int queue_nr)507 static int arm_spe__setup_queue(struct arm_spe *spe,
508 			       struct auxtrace_queue *queue,
509 			       unsigned int queue_nr)
510 {
511 	struct arm_spe_queue *speq = queue->priv;
512 	struct arm_spe_record *record;
513 
514 	if (list_empty(&queue->head) || speq)
515 		return 0;
516 
517 	speq = arm_spe__alloc_queue(spe, queue_nr);
518 
519 	if (!speq)
520 		return -ENOMEM;
521 
522 	queue->priv = speq;
523 
524 	if (queue->cpu != -1)
525 		speq->cpu = queue->cpu;
526 
527 	if (!speq->on_heap) {
528 		int ret;
529 
530 		if (spe->timeless_decoding)
531 			return 0;
532 
533 retry:
534 		ret = arm_spe_decode(speq->decoder);
535 
536 		if (!ret)
537 			return 0;
538 
539 		if (ret < 0)
540 			goto retry;
541 
542 		record = &speq->decoder->record;
543 
544 		speq->timestamp = record->timestamp;
545 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
546 		if (ret)
547 			return ret;
548 		speq->on_heap = true;
549 	}
550 
551 	return 0;
552 }
553 
arm_spe__setup_queues(struct arm_spe * spe)554 static int arm_spe__setup_queues(struct arm_spe *spe)
555 {
556 	unsigned int i;
557 	int ret;
558 
559 	for (i = 0; i < spe->queues.nr_queues; i++) {
560 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
561 		if (ret)
562 			return ret;
563 	}
564 
565 	return 0;
566 }
567 
arm_spe__update_queues(struct arm_spe * spe)568 static int arm_spe__update_queues(struct arm_spe *spe)
569 {
570 	if (spe->queues.new_data) {
571 		spe->queues.new_data = false;
572 		return arm_spe__setup_queues(spe);
573 	}
574 
575 	return 0;
576 }
577 
arm_spe__is_timeless_decoding(struct arm_spe * spe)578 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
579 {
580 	struct evsel *evsel;
581 	struct evlist *evlist = spe->session->evlist;
582 	bool timeless_decoding = true;
583 
584 	/*
585 	 * Circle through the list of event and complain if we find one
586 	 * with the time bit set.
587 	 */
588 	evlist__for_each_entry(evlist, evsel) {
589 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
590 			timeless_decoding = false;
591 	}
592 
593 	return timeless_decoding;
594 }
595 
arm_spe_set_pid_tid_cpu(struct arm_spe * spe,struct auxtrace_queue * queue)596 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
597 				    struct auxtrace_queue *queue)
598 {
599 	struct arm_spe_queue *speq = queue->priv;
600 	pid_t tid;
601 
602 	tid = machine__get_current_tid(spe->machine, speq->cpu);
603 	if (tid != -1) {
604 		speq->tid = tid;
605 		thread__zput(speq->thread);
606 	} else
607 		speq->tid = queue->tid;
608 
609 	if ((!speq->thread) && (speq->tid != -1)) {
610 		speq->thread = machine__find_thread(spe->machine, -1,
611 						    speq->tid);
612 	}
613 
614 	if (speq->thread) {
615 		speq->pid = speq->thread->pid_;
616 		if (queue->cpu == -1)
617 			speq->cpu = speq->thread->cpu;
618 	}
619 }
620 
arm_spe_process_queues(struct arm_spe * spe,u64 timestamp)621 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
622 {
623 	unsigned int queue_nr;
624 	u64 ts;
625 	int ret;
626 
627 	while (1) {
628 		struct auxtrace_queue *queue;
629 		struct arm_spe_queue *speq;
630 
631 		if (!spe->heap.heap_cnt)
632 			return 0;
633 
634 		if (spe->heap.heap_array[0].ordinal >= timestamp)
635 			return 0;
636 
637 		queue_nr = spe->heap.heap_array[0].queue_nr;
638 		queue = &spe->queues.queue_array[queue_nr];
639 		speq = queue->priv;
640 
641 		auxtrace_heap__pop(&spe->heap);
642 
643 		if (spe->heap.heap_cnt) {
644 			ts = spe->heap.heap_array[0].ordinal + 1;
645 			if (ts > timestamp)
646 				ts = timestamp;
647 		} else {
648 			ts = timestamp;
649 		}
650 
651 		arm_spe_set_pid_tid_cpu(spe, queue);
652 
653 		ret = arm_spe_run_decoder(speq, &ts);
654 		if (ret < 0) {
655 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
656 			return ret;
657 		}
658 
659 		if (!ret) {
660 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
661 			if (ret < 0)
662 				return ret;
663 		} else {
664 			speq->on_heap = false;
665 		}
666 	}
667 
668 	return 0;
669 }
670 
arm_spe_process_timeless_queues(struct arm_spe * spe,pid_t tid,u64 time_)671 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
672 					    u64 time_)
673 {
674 	struct auxtrace_queues *queues = &spe->queues;
675 	unsigned int i;
676 	u64 ts = 0;
677 
678 	for (i = 0; i < queues->nr_queues; i++) {
679 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
680 		struct arm_spe_queue *speq = queue->priv;
681 
682 		if (speq && (tid == -1 || speq->tid == tid)) {
683 			speq->time = time_;
684 			arm_spe_set_pid_tid_cpu(spe, queue);
685 			arm_spe_run_decoder(speq, &ts);
686 		}
687 	}
688 	return 0;
689 }
690 
arm_spe_process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,struct perf_tool * tool)691 static int arm_spe_process_event(struct perf_session *session,
692 				 union perf_event *event,
693 				 struct perf_sample *sample,
694 				 struct perf_tool *tool)
695 {
696 	int err = 0;
697 	u64 timestamp;
698 	struct arm_spe *spe = container_of(session->auxtrace,
699 			struct arm_spe, auxtrace);
700 
701 	if (dump_trace)
702 		return 0;
703 
704 	if (!tool->ordered_events) {
705 		pr_err("SPE trace requires ordered events\n");
706 		return -EINVAL;
707 	}
708 
709 	if (sample->time && (sample->time != (u64) -1))
710 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
711 	else
712 		timestamp = 0;
713 
714 	if (timestamp || spe->timeless_decoding) {
715 		err = arm_spe__update_queues(spe);
716 		if (err)
717 			return err;
718 	}
719 
720 	if (spe->timeless_decoding) {
721 		if (event->header.type == PERF_RECORD_EXIT) {
722 			err = arm_spe_process_timeless_queues(spe,
723 					event->fork.tid,
724 					sample->time);
725 		}
726 	} else if (timestamp) {
727 		err = arm_spe_process_queues(spe, timestamp);
728 	}
729 
730 	return err;
731 }
732 
arm_spe_process_auxtrace_event(struct perf_session * session,union perf_event * event,struct perf_tool * tool __maybe_unused)733 static int arm_spe_process_auxtrace_event(struct perf_session *session,
734 					  union perf_event *event,
735 					  struct perf_tool *tool __maybe_unused)
736 {
737 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
738 					     auxtrace);
739 
740 	if (!spe->data_queued) {
741 		struct auxtrace_buffer *buffer;
742 		off_t data_offset;
743 		int fd = perf_data__fd(session->data);
744 		int err;
745 
746 		if (perf_data__is_pipe(session->data)) {
747 			data_offset = 0;
748 		} else {
749 			data_offset = lseek(fd, 0, SEEK_CUR);
750 			if (data_offset == -1)
751 				return -errno;
752 		}
753 
754 		err = auxtrace_queues__add_event(&spe->queues, session, event,
755 				data_offset, &buffer);
756 		if (err)
757 			return err;
758 
759 		/* Dump here now we have copied a piped trace out of the pipe */
760 		if (dump_trace) {
761 			if (auxtrace_buffer__get_data(buffer, fd)) {
762 				arm_spe_dump_event(spe, buffer->data,
763 						buffer->size);
764 				auxtrace_buffer__put_data(buffer);
765 			}
766 		}
767 	}
768 
769 	return 0;
770 }
771 
arm_spe_flush(struct perf_session * session __maybe_unused,struct perf_tool * tool __maybe_unused)772 static int arm_spe_flush(struct perf_session *session __maybe_unused,
773 			 struct perf_tool *tool __maybe_unused)
774 {
775 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
776 			auxtrace);
777 	int ret;
778 
779 	if (dump_trace)
780 		return 0;
781 
782 	if (!tool->ordered_events)
783 		return -EINVAL;
784 
785 	ret = arm_spe__update_queues(spe);
786 	if (ret < 0)
787 		return ret;
788 
789 	if (spe->timeless_decoding)
790 		return arm_spe_process_timeless_queues(spe, -1,
791 				MAX_TIMESTAMP - 1);
792 
793 	return arm_spe_process_queues(spe, MAX_TIMESTAMP);
794 }
795 
arm_spe_free_queue(void * priv)796 static void arm_spe_free_queue(void *priv)
797 {
798 	struct arm_spe_queue *speq = priv;
799 
800 	if (!speq)
801 		return;
802 	thread__zput(speq->thread);
803 	arm_spe_decoder_free(speq->decoder);
804 	zfree(&speq->event_buf);
805 	free(speq);
806 }
807 
arm_spe_free_events(struct perf_session * session)808 static void arm_spe_free_events(struct perf_session *session)
809 {
810 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
811 					     auxtrace);
812 	struct auxtrace_queues *queues = &spe->queues;
813 	unsigned int i;
814 
815 	for (i = 0; i < queues->nr_queues; i++) {
816 		arm_spe_free_queue(queues->queue_array[i].priv);
817 		queues->queue_array[i].priv = NULL;
818 	}
819 	auxtrace_queues__free(queues);
820 }
821 
arm_spe_free(struct perf_session * session)822 static void arm_spe_free(struct perf_session *session)
823 {
824 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
825 					     auxtrace);
826 
827 	auxtrace_heap__free(&spe->heap);
828 	arm_spe_free_events(session);
829 	session->auxtrace = NULL;
830 	free(spe);
831 }
832 
arm_spe_evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)833 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
834 				      struct evsel *evsel)
835 {
836 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
837 
838 	return evsel->core.attr.type == spe->pmu_type;
839 }
840 
841 static const char * const arm_spe_info_fmts[] = {
842 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
843 };
844 
arm_spe_print_info(__u64 * arr)845 static void arm_spe_print_info(__u64 *arr)
846 {
847 	if (!dump_trace)
848 		return;
849 
850 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
851 }
852 
853 struct arm_spe_synth {
854 	struct perf_tool dummy_tool;
855 	struct perf_session *session;
856 };
857 
arm_spe_event_synth(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)858 static int arm_spe_event_synth(struct perf_tool *tool,
859 			       union perf_event *event,
860 			       struct perf_sample *sample __maybe_unused,
861 			       struct machine *machine __maybe_unused)
862 {
863 	struct arm_spe_synth *arm_spe_synth =
864 		      container_of(tool, struct arm_spe_synth, dummy_tool);
865 
866 	return perf_session__deliver_synth_event(arm_spe_synth->session,
867 						 event, NULL);
868 }
869 
arm_spe_synth_event(struct perf_session * session,struct perf_event_attr * attr,u64 id)870 static int arm_spe_synth_event(struct perf_session *session,
871 			       struct perf_event_attr *attr, u64 id)
872 {
873 	struct arm_spe_synth arm_spe_synth;
874 
875 	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
876 	arm_spe_synth.session = session;
877 
878 	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
879 					   &id, arm_spe_event_synth);
880 }
881 
arm_spe_set_event_name(struct evlist * evlist,u64 id,const char * name)882 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
883 				    const char *name)
884 {
885 	struct evsel *evsel;
886 
887 	evlist__for_each_entry(evlist, evsel) {
888 		if (evsel->core.id && evsel->core.id[0] == id) {
889 			if (evsel->name)
890 				zfree(&evsel->name);
891 			evsel->name = strdup(name);
892 			break;
893 		}
894 	}
895 }
896 
897 static int
arm_spe_synth_events(struct arm_spe * spe,struct perf_session * session)898 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
899 {
900 	struct evlist *evlist = session->evlist;
901 	struct evsel *evsel;
902 	struct perf_event_attr attr;
903 	bool found = false;
904 	u64 id;
905 	int err;
906 
907 	evlist__for_each_entry(evlist, evsel) {
908 		if (evsel->core.attr.type == spe->pmu_type) {
909 			found = true;
910 			break;
911 		}
912 	}
913 
914 	if (!found) {
915 		pr_debug("No selected events with SPE trace data\n");
916 		return 0;
917 	}
918 
919 	memset(&attr, 0, sizeof(struct perf_event_attr));
920 	attr.size = sizeof(struct perf_event_attr);
921 	attr.type = PERF_TYPE_HARDWARE;
922 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
923 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
924 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
925 			    PERF_SAMPLE_ADDR;
926 	if (spe->timeless_decoding)
927 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
928 	else
929 		attr.sample_type |= PERF_SAMPLE_TIME;
930 
931 	spe->sample_type = attr.sample_type;
932 
933 	attr.exclude_user = evsel->core.attr.exclude_user;
934 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
935 	attr.exclude_hv = evsel->core.attr.exclude_hv;
936 	attr.exclude_host = evsel->core.attr.exclude_host;
937 	attr.exclude_guest = evsel->core.attr.exclude_guest;
938 	attr.sample_id_all = evsel->core.attr.sample_id_all;
939 	attr.read_format = evsel->core.attr.read_format;
940 
941 	/* create new id val to be a fixed offset from evsel id */
942 	id = evsel->core.id[0] + 1000000000;
943 
944 	if (!id)
945 		id = 1;
946 
947 	if (spe->synth_opts.flc) {
948 		spe->sample_flc = true;
949 
950 		/* Level 1 data cache miss */
951 		err = arm_spe_synth_event(session, &attr, id);
952 		if (err)
953 			return err;
954 		spe->l1d_miss_id = id;
955 		arm_spe_set_event_name(evlist, id, "l1d-miss");
956 		id += 1;
957 
958 		/* Level 1 data cache access */
959 		err = arm_spe_synth_event(session, &attr, id);
960 		if (err)
961 			return err;
962 		spe->l1d_access_id = id;
963 		arm_spe_set_event_name(evlist, id, "l1d-access");
964 		id += 1;
965 	}
966 
967 	if (spe->synth_opts.llc) {
968 		spe->sample_llc = true;
969 
970 		/* Last level cache miss */
971 		err = arm_spe_synth_event(session, &attr, id);
972 		if (err)
973 			return err;
974 		spe->llc_miss_id = id;
975 		arm_spe_set_event_name(evlist, id, "llc-miss");
976 		id += 1;
977 
978 		/* Last level cache access */
979 		err = arm_spe_synth_event(session, &attr, id);
980 		if (err)
981 			return err;
982 		spe->llc_access_id = id;
983 		arm_spe_set_event_name(evlist, id, "llc-access");
984 		id += 1;
985 	}
986 
987 	if (spe->synth_opts.tlb) {
988 		spe->sample_tlb = true;
989 
990 		/* TLB miss */
991 		err = arm_spe_synth_event(session, &attr, id);
992 		if (err)
993 			return err;
994 		spe->tlb_miss_id = id;
995 		arm_spe_set_event_name(evlist, id, "tlb-miss");
996 		id += 1;
997 
998 		/* TLB access */
999 		err = arm_spe_synth_event(session, &attr, id);
1000 		if (err)
1001 			return err;
1002 		spe->tlb_access_id = id;
1003 		arm_spe_set_event_name(evlist, id, "tlb-access");
1004 		id += 1;
1005 	}
1006 
1007 	if (spe->synth_opts.branches) {
1008 		spe->sample_branch = true;
1009 
1010 		/* Branch miss */
1011 		err = arm_spe_synth_event(session, &attr, id);
1012 		if (err)
1013 			return err;
1014 		spe->branch_miss_id = id;
1015 		arm_spe_set_event_name(evlist, id, "branch-miss");
1016 		id += 1;
1017 	}
1018 
1019 	if (spe->synth_opts.remote_access) {
1020 		spe->sample_remote_access = true;
1021 
1022 		/* Remote access */
1023 		err = arm_spe_synth_event(session, &attr, id);
1024 		if (err)
1025 			return err;
1026 		spe->remote_access_id = id;
1027 		arm_spe_set_event_name(evlist, id, "remote-access");
1028 		id += 1;
1029 	}
1030 
1031 	if (spe->synth_opts.mem) {
1032 		spe->sample_memory = true;
1033 
1034 		err = arm_spe_synth_event(session, &attr, id);
1035 		if (err)
1036 			return err;
1037 		spe->memory_id = id;
1038 		arm_spe_set_event_name(evlist, id, "memory");
1039 	}
1040 
1041 	return 0;
1042 }
1043 
arm_spe_process_auxtrace_info(union perf_event * event,struct perf_session * session)1044 int arm_spe_process_auxtrace_info(union perf_event *event,
1045 				  struct perf_session *session)
1046 {
1047 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1048 	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1049 	struct perf_record_time_conv *tc = &session->time_conv;
1050 	struct arm_spe *spe;
1051 	int err;
1052 
1053 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1054 					min_sz)
1055 		return -EINVAL;
1056 
1057 	spe = zalloc(sizeof(struct arm_spe));
1058 	if (!spe)
1059 		return -ENOMEM;
1060 
1061 	err = auxtrace_queues__init(&spe->queues);
1062 	if (err)
1063 		goto err_free;
1064 
1065 	spe->session = session;
1066 	spe->machine = &session->machines.host; /* No kvm support */
1067 	spe->auxtrace_type = auxtrace_info->type;
1068 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1069 
1070 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1071 
1072 	/*
1073 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1074 	 * and the parameters for hardware clock are stored in the session
1075 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1076 	 * in "spe->tc", which is used for later conversion between clock
1077 	 * counter and timestamp.
1078 	 *
1079 	 * For backward compatibility, copies the fields starting from
1080 	 * "time_cycles" only if they are contained in the event.
1081 	 */
1082 	spe->tc.time_shift = tc->time_shift;
1083 	spe->tc.time_mult = tc->time_mult;
1084 	spe->tc.time_zero = tc->time_zero;
1085 
1086 	if (event_contains(*tc, time_cycles)) {
1087 		spe->tc.time_cycles = tc->time_cycles;
1088 		spe->tc.time_mask = tc->time_mask;
1089 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1090 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1091 	}
1092 
1093 	spe->auxtrace.process_event = arm_spe_process_event;
1094 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1095 	spe->auxtrace.flush_events = arm_spe_flush;
1096 	spe->auxtrace.free_events = arm_spe_free_events;
1097 	spe->auxtrace.free = arm_spe_free;
1098 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1099 	session->auxtrace = &spe->auxtrace;
1100 
1101 	arm_spe_print_info(&auxtrace_info->priv[0]);
1102 
1103 	if (dump_trace)
1104 		return 0;
1105 
1106 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1107 		spe->synth_opts = *session->itrace_synth_opts;
1108 	else
1109 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1110 
1111 	err = arm_spe_synth_events(spe, session);
1112 	if (err)
1113 		goto err_free_queues;
1114 
1115 	err = auxtrace_queues__process_index(&spe->queues, session);
1116 	if (err)
1117 		goto err_free_queues;
1118 
1119 	if (spe->queues.populated)
1120 		spe->data_queued = true;
1121 
1122 	return 0;
1123 
1124 err_free_queues:
1125 	auxtrace_queues__free(&spe->queues);
1126 	session->auxtrace = NULL;
1127 err_free:
1128 	free(spe);
1129 	return err;
1130 }
1131