1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
5 */
6
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36
37 #define MAX_TIMESTAMP (~0ULL)
38
39 struct arm_spe {
40 struct auxtrace auxtrace;
41 struct auxtrace_queues queues;
42 struct auxtrace_heap heap;
43 struct itrace_synth_opts synth_opts;
44 u32 auxtrace_type;
45 struct perf_session *session;
46 struct machine *machine;
47 u32 pmu_type;
48
49 struct perf_tsc_conversion tc;
50
51 u8 timeless_decoding;
52 u8 data_queued;
53
54 u64 sample_type;
55 u8 sample_flc;
56 u8 sample_llc;
57 u8 sample_tlb;
58 u8 sample_branch;
59 u8 sample_remote_access;
60 u8 sample_memory;
61
62 u64 l1d_miss_id;
63 u64 l1d_access_id;
64 u64 llc_miss_id;
65 u64 llc_access_id;
66 u64 tlb_miss_id;
67 u64 tlb_access_id;
68 u64 branch_miss_id;
69 u64 remote_access_id;
70 u64 memory_id;
71
72 u64 kernel_start;
73
74 unsigned long num_events;
75 };
76
77 struct arm_spe_queue {
78 struct arm_spe *spe;
79 unsigned int queue_nr;
80 struct auxtrace_buffer *buffer;
81 struct auxtrace_buffer *old_buffer;
82 union perf_event *event_buf;
83 bool on_heap;
84 bool done;
85 pid_t pid;
86 pid_t tid;
87 int cpu;
88 struct arm_spe_decoder *decoder;
89 u64 time;
90 u64 timestamp;
91 struct thread *thread;
92 };
93
arm_spe_dump(struct arm_spe * spe __maybe_unused,unsigned char * buf,size_t len)94 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
95 unsigned char *buf, size_t len)
96 {
97 struct arm_spe_pkt packet;
98 size_t pos = 0;
99 int ret, pkt_len, i;
100 char desc[ARM_SPE_PKT_DESC_MAX];
101 const char *color = PERF_COLOR_BLUE;
102
103 color_fprintf(stdout, color,
104 ". ... ARM SPE data: size %zu bytes\n",
105 len);
106
107 while (len) {
108 ret = arm_spe_get_packet(buf, len, &packet);
109 if (ret > 0)
110 pkt_len = ret;
111 else
112 pkt_len = 1;
113 printf(".");
114 color_fprintf(stdout, color, " %08x: ", pos);
115 for (i = 0; i < pkt_len; i++)
116 color_fprintf(stdout, color, " %02x", buf[i]);
117 for (; i < 16; i++)
118 color_fprintf(stdout, color, " ");
119 if (ret > 0) {
120 ret = arm_spe_pkt_desc(&packet, desc,
121 ARM_SPE_PKT_DESC_MAX);
122 if (!ret)
123 color_fprintf(stdout, color, " %s\n", desc);
124 } else {
125 color_fprintf(stdout, color, " Bad packet!\n");
126 }
127 pos += pkt_len;
128 buf += pkt_len;
129 len -= pkt_len;
130 }
131 }
132
arm_spe_dump_event(struct arm_spe * spe,unsigned char * buf,size_t len)133 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
134 size_t len)
135 {
136 printf(".\n");
137 arm_spe_dump(spe, buf, len);
138 }
139
arm_spe_get_trace(struct arm_spe_buffer * b,void * data)140 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
141 {
142 struct arm_spe_queue *speq = data;
143 struct auxtrace_buffer *buffer = speq->buffer;
144 struct auxtrace_buffer *old_buffer = speq->old_buffer;
145 struct auxtrace_queue *queue;
146
147 queue = &speq->spe->queues.queue_array[speq->queue_nr];
148
149 buffer = auxtrace_buffer__next(queue, buffer);
150 /* If no more data, drop the previous auxtrace_buffer and return */
151 if (!buffer) {
152 if (old_buffer)
153 auxtrace_buffer__drop_data(old_buffer);
154 b->len = 0;
155 return 0;
156 }
157
158 speq->buffer = buffer;
159
160 /* If the aux_buffer doesn't have data associated, try to load it */
161 if (!buffer->data) {
162 /* get the file desc associated with the perf data file */
163 int fd = perf_data__fd(speq->spe->session->data);
164
165 buffer->data = auxtrace_buffer__get_data(buffer, fd);
166 if (!buffer->data)
167 return -ENOMEM;
168 }
169
170 b->len = buffer->size;
171 b->buf = buffer->data;
172
173 if (b->len) {
174 if (old_buffer)
175 auxtrace_buffer__drop_data(old_buffer);
176 speq->old_buffer = buffer;
177 } else {
178 auxtrace_buffer__drop_data(buffer);
179 return arm_spe_get_trace(b, data);
180 }
181
182 return 0;
183 }
184
arm_spe__alloc_queue(struct arm_spe * spe,unsigned int queue_nr)185 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
186 unsigned int queue_nr)
187 {
188 struct arm_spe_params params = { .get_trace = 0, };
189 struct arm_spe_queue *speq;
190
191 speq = zalloc(sizeof(*speq));
192 if (!speq)
193 return NULL;
194
195 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
196 if (!speq->event_buf)
197 goto out_free;
198
199 speq->spe = spe;
200 speq->queue_nr = queue_nr;
201 speq->pid = -1;
202 speq->tid = -1;
203 speq->cpu = -1;
204
205 /* params set */
206 params.get_trace = arm_spe_get_trace;
207 params.data = speq;
208
209 /* create new decoder */
210 speq->decoder = arm_spe_decoder_new(¶ms);
211 if (!speq->decoder)
212 goto out_free;
213
214 return speq;
215
216 out_free:
217 zfree(&speq->event_buf);
218 free(speq);
219
220 return NULL;
221 }
222
arm_spe_cpumode(struct arm_spe * spe,u64 ip)223 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
224 {
225 return ip >= spe->kernel_start ?
226 PERF_RECORD_MISC_KERNEL :
227 PERF_RECORD_MISC_USER;
228 }
229
arm_spe_prep_sample(struct arm_spe * spe,struct arm_spe_queue * speq,union perf_event * event,struct perf_sample * sample)230 static void arm_spe_prep_sample(struct arm_spe *spe,
231 struct arm_spe_queue *speq,
232 union perf_event *event,
233 struct perf_sample *sample)
234 {
235 struct arm_spe_record *record = &speq->decoder->record;
236
237 if (!spe->timeless_decoding)
238 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
239
240 sample->ip = record->from_ip;
241 sample->cpumode = arm_spe_cpumode(spe, sample->ip);
242 sample->pid = speq->pid;
243 sample->tid = speq->tid;
244 sample->period = 1;
245 sample->cpu = speq->cpu;
246
247 event->sample.header.type = PERF_RECORD_SAMPLE;
248 event->sample.header.misc = sample->cpumode;
249 event->sample.header.size = sizeof(struct perf_event_header);
250 }
251
arm_spe__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)252 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
253 {
254 event->header.size = perf_event__sample_event_size(sample, type, 0);
255 return perf_event__synthesize_sample(event, type, 0, sample);
256 }
257
258 static inline int
arm_spe_deliver_synth_event(struct arm_spe * spe,struct arm_spe_queue * speq __maybe_unused,union perf_event * event,struct perf_sample * sample)259 arm_spe_deliver_synth_event(struct arm_spe *spe,
260 struct arm_spe_queue *speq __maybe_unused,
261 union perf_event *event,
262 struct perf_sample *sample)
263 {
264 int ret;
265
266 if (spe->synth_opts.inject) {
267 ret = arm_spe__inject_event(event, sample, spe->sample_type);
268 if (ret)
269 return ret;
270 }
271
272 ret = perf_session__deliver_synth_event(spe->session, event, sample);
273 if (ret)
274 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
275
276 return ret;
277 }
278
arm_spe__synth_mem_sample(struct arm_spe_queue * speq,u64 spe_events_id,u64 data_src)279 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
280 u64 spe_events_id, u64 data_src)
281 {
282 struct arm_spe *spe = speq->spe;
283 struct arm_spe_record *record = &speq->decoder->record;
284 union perf_event *event = speq->event_buf;
285 struct perf_sample sample = { .ip = 0, };
286
287 arm_spe_prep_sample(spe, speq, event, &sample);
288
289 sample.id = spe_events_id;
290 sample.stream_id = spe_events_id;
291 sample.addr = record->virt_addr;
292 sample.phys_addr = record->phys_addr;
293 sample.data_src = data_src;
294
295 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
296 }
297
arm_spe__synth_branch_sample(struct arm_spe_queue * speq,u64 spe_events_id)298 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
299 u64 spe_events_id)
300 {
301 struct arm_spe *spe = speq->spe;
302 struct arm_spe_record *record = &speq->decoder->record;
303 union perf_event *event = speq->event_buf;
304 struct perf_sample sample = { .ip = 0, };
305
306 arm_spe_prep_sample(spe, speq, event, &sample);
307
308 sample.id = spe_events_id;
309 sample.stream_id = spe_events_id;
310 sample.addr = record->to_ip;
311
312 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
313 }
314
arm_spe__synth_data_source(const struct arm_spe_record * record)315 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
316 {
317 union perf_mem_data_src data_src = { 0 };
318
319 if (record->op == ARM_SPE_LD)
320 data_src.mem_op = PERF_MEM_OP_LOAD;
321 else if (record->op == ARM_SPE_ST)
322 data_src.mem_op = PERF_MEM_OP_STORE;
323 else
324 return 0;
325
326 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
327 data_src.mem_lvl = PERF_MEM_LVL_L3;
328
329 if (record->type & ARM_SPE_LLC_MISS)
330 data_src.mem_lvl |= PERF_MEM_LVL_MISS;
331 else
332 data_src.mem_lvl |= PERF_MEM_LVL_HIT;
333 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
334 data_src.mem_lvl = PERF_MEM_LVL_L1;
335
336 if (record->type & ARM_SPE_L1D_MISS)
337 data_src.mem_lvl |= PERF_MEM_LVL_MISS;
338 else
339 data_src.mem_lvl |= PERF_MEM_LVL_HIT;
340 }
341
342 if (record->type & ARM_SPE_REMOTE_ACCESS)
343 data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
344
345 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
346 data_src.mem_dtlb = PERF_MEM_TLB_WK;
347
348 if (record->type & ARM_SPE_TLB_MISS)
349 data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
350 else
351 data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
352 }
353
354 return data_src.val;
355 }
356
arm_spe_sample(struct arm_spe_queue * speq)357 static int arm_spe_sample(struct arm_spe_queue *speq)
358 {
359 const struct arm_spe_record *record = &speq->decoder->record;
360 struct arm_spe *spe = speq->spe;
361 u64 data_src;
362 int err;
363
364 data_src = arm_spe__synth_data_source(record);
365
366 if (spe->sample_flc) {
367 if (record->type & ARM_SPE_L1D_MISS) {
368 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
369 data_src);
370 if (err)
371 return err;
372 }
373
374 if (record->type & ARM_SPE_L1D_ACCESS) {
375 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
376 data_src);
377 if (err)
378 return err;
379 }
380 }
381
382 if (spe->sample_llc) {
383 if (record->type & ARM_SPE_LLC_MISS) {
384 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
385 data_src);
386 if (err)
387 return err;
388 }
389
390 if (record->type & ARM_SPE_LLC_ACCESS) {
391 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
392 data_src);
393 if (err)
394 return err;
395 }
396 }
397
398 if (spe->sample_tlb) {
399 if (record->type & ARM_SPE_TLB_MISS) {
400 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
401 data_src);
402 if (err)
403 return err;
404 }
405
406 if (record->type & ARM_SPE_TLB_ACCESS) {
407 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
408 data_src);
409 if (err)
410 return err;
411 }
412 }
413
414 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
415 err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
416 if (err)
417 return err;
418 }
419
420 if (spe->sample_remote_access &&
421 (record->type & ARM_SPE_REMOTE_ACCESS)) {
422 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
423 data_src);
424 if (err)
425 return err;
426 }
427
428 /*
429 * When data_src is zero it means the record is not a memory operation,
430 * skip to synthesize memory sample for this case.
431 */
432 if (spe->sample_memory && data_src) {
433 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
434 if (err)
435 return err;
436 }
437
438 return 0;
439 }
440
arm_spe_run_decoder(struct arm_spe_queue * speq,u64 * timestamp)441 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
442 {
443 struct arm_spe *spe = speq->spe;
444 struct arm_spe_record *record;
445 int ret;
446
447 if (!spe->kernel_start)
448 spe->kernel_start = machine__kernel_start(spe->machine);
449
450 while (1) {
451 /*
452 * The usual logic is firstly to decode the packets, and then
453 * based the record to synthesize sample; but here the flow is
454 * reversed: it calls arm_spe_sample() for synthesizing samples
455 * prior to arm_spe_decode().
456 *
457 * Two reasons for this code logic:
458 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
459 * has decoded trace data and generated a record, but the record
460 * is left to generate sample until run to here, so it's correct
461 * to synthesize sample for the left record.
462 * 2. After decoding trace data, it needs to compare the record
463 * timestamp with the coming perf event, if the record timestamp
464 * is later than the perf event, it needs bail out and pushs the
465 * record into auxtrace heap, thus the record can be deferred to
466 * synthesize sample until run to here at the next time; so this
467 * can correlate samples between Arm SPE trace data and other
468 * perf events with correct time ordering.
469 */
470 ret = arm_spe_sample(speq);
471 if (ret)
472 return ret;
473
474 ret = arm_spe_decode(speq->decoder);
475 if (!ret) {
476 pr_debug("No data or all data has been processed.\n");
477 return 1;
478 }
479
480 /*
481 * Error is detected when decode SPE trace data, continue to
482 * the next trace data and find out more records.
483 */
484 if (ret < 0)
485 continue;
486
487 record = &speq->decoder->record;
488
489 /* Update timestamp for the last record */
490 if (record->timestamp > speq->timestamp)
491 speq->timestamp = record->timestamp;
492
493 /*
494 * If the timestamp of the queue is later than timestamp of the
495 * coming perf event, bail out so can allow the perf event to
496 * be processed ahead.
497 */
498 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
499 *timestamp = speq->timestamp;
500 return 0;
501 }
502 }
503
504 return 0;
505 }
506
arm_spe__setup_queue(struct arm_spe * spe,struct auxtrace_queue * queue,unsigned int queue_nr)507 static int arm_spe__setup_queue(struct arm_spe *spe,
508 struct auxtrace_queue *queue,
509 unsigned int queue_nr)
510 {
511 struct arm_spe_queue *speq = queue->priv;
512 struct arm_spe_record *record;
513
514 if (list_empty(&queue->head) || speq)
515 return 0;
516
517 speq = arm_spe__alloc_queue(spe, queue_nr);
518
519 if (!speq)
520 return -ENOMEM;
521
522 queue->priv = speq;
523
524 if (queue->cpu != -1)
525 speq->cpu = queue->cpu;
526
527 if (!speq->on_heap) {
528 int ret;
529
530 if (spe->timeless_decoding)
531 return 0;
532
533 retry:
534 ret = arm_spe_decode(speq->decoder);
535
536 if (!ret)
537 return 0;
538
539 if (ret < 0)
540 goto retry;
541
542 record = &speq->decoder->record;
543
544 speq->timestamp = record->timestamp;
545 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
546 if (ret)
547 return ret;
548 speq->on_heap = true;
549 }
550
551 return 0;
552 }
553
arm_spe__setup_queues(struct arm_spe * spe)554 static int arm_spe__setup_queues(struct arm_spe *spe)
555 {
556 unsigned int i;
557 int ret;
558
559 for (i = 0; i < spe->queues.nr_queues; i++) {
560 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
561 if (ret)
562 return ret;
563 }
564
565 return 0;
566 }
567
arm_spe__update_queues(struct arm_spe * spe)568 static int arm_spe__update_queues(struct arm_spe *spe)
569 {
570 if (spe->queues.new_data) {
571 spe->queues.new_data = false;
572 return arm_spe__setup_queues(spe);
573 }
574
575 return 0;
576 }
577
arm_spe__is_timeless_decoding(struct arm_spe * spe)578 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
579 {
580 struct evsel *evsel;
581 struct evlist *evlist = spe->session->evlist;
582 bool timeless_decoding = true;
583
584 /*
585 * Circle through the list of event and complain if we find one
586 * with the time bit set.
587 */
588 evlist__for_each_entry(evlist, evsel) {
589 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
590 timeless_decoding = false;
591 }
592
593 return timeless_decoding;
594 }
595
arm_spe_set_pid_tid_cpu(struct arm_spe * spe,struct auxtrace_queue * queue)596 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
597 struct auxtrace_queue *queue)
598 {
599 struct arm_spe_queue *speq = queue->priv;
600 pid_t tid;
601
602 tid = machine__get_current_tid(spe->machine, speq->cpu);
603 if (tid != -1) {
604 speq->tid = tid;
605 thread__zput(speq->thread);
606 } else
607 speq->tid = queue->tid;
608
609 if ((!speq->thread) && (speq->tid != -1)) {
610 speq->thread = machine__find_thread(spe->machine, -1,
611 speq->tid);
612 }
613
614 if (speq->thread) {
615 speq->pid = speq->thread->pid_;
616 if (queue->cpu == -1)
617 speq->cpu = speq->thread->cpu;
618 }
619 }
620
arm_spe_process_queues(struct arm_spe * spe,u64 timestamp)621 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
622 {
623 unsigned int queue_nr;
624 u64 ts;
625 int ret;
626
627 while (1) {
628 struct auxtrace_queue *queue;
629 struct arm_spe_queue *speq;
630
631 if (!spe->heap.heap_cnt)
632 return 0;
633
634 if (spe->heap.heap_array[0].ordinal >= timestamp)
635 return 0;
636
637 queue_nr = spe->heap.heap_array[0].queue_nr;
638 queue = &spe->queues.queue_array[queue_nr];
639 speq = queue->priv;
640
641 auxtrace_heap__pop(&spe->heap);
642
643 if (spe->heap.heap_cnt) {
644 ts = spe->heap.heap_array[0].ordinal + 1;
645 if (ts > timestamp)
646 ts = timestamp;
647 } else {
648 ts = timestamp;
649 }
650
651 arm_spe_set_pid_tid_cpu(spe, queue);
652
653 ret = arm_spe_run_decoder(speq, &ts);
654 if (ret < 0) {
655 auxtrace_heap__add(&spe->heap, queue_nr, ts);
656 return ret;
657 }
658
659 if (!ret) {
660 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
661 if (ret < 0)
662 return ret;
663 } else {
664 speq->on_heap = false;
665 }
666 }
667
668 return 0;
669 }
670
arm_spe_process_timeless_queues(struct arm_spe * spe,pid_t tid,u64 time_)671 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
672 u64 time_)
673 {
674 struct auxtrace_queues *queues = &spe->queues;
675 unsigned int i;
676 u64 ts = 0;
677
678 for (i = 0; i < queues->nr_queues; i++) {
679 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
680 struct arm_spe_queue *speq = queue->priv;
681
682 if (speq && (tid == -1 || speq->tid == tid)) {
683 speq->time = time_;
684 arm_spe_set_pid_tid_cpu(spe, queue);
685 arm_spe_run_decoder(speq, &ts);
686 }
687 }
688 return 0;
689 }
690
arm_spe_process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,struct perf_tool * tool)691 static int arm_spe_process_event(struct perf_session *session,
692 union perf_event *event,
693 struct perf_sample *sample,
694 struct perf_tool *tool)
695 {
696 int err = 0;
697 u64 timestamp;
698 struct arm_spe *spe = container_of(session->auxtrace,
699 struct arm_spe, auxtrace);
700
701 if (dump_trace)
702 return 0;
703
704 if (!tool->ordered_events) {
705 pr_err("SPE trace requires ordered events\n");
706 return -EINVAL;
707 }
708
709 if (sample->time && (sample->time != (u64) -1))
710 timestamp = perf_time_to_tsc(sample->time, &spe->tc);
711 else
712 timestamp = 0;
713
714 if (timestamp || spe->timeless_decoding) {
715 err = arm_spe__update_queues(spe);
716 if (err)
717 return err;
718 }
719
720 if (spe->timeless_decoding) {
721 if (event->header.type == PERF_RECORD_EXIT) {
722 err = arm_spe_process_timeless_queues(spe,
723 event->fork.tid,
724 sample->time);
725 }
726 } else if (timestamp) {
727 err = arm_spe_process_queues(spe, timestamp);
728 }
729
730 return err;
731 }
732
arm_spe_process_auxtrace_event(struct perf_session * session,union perf_event * event,struct perf_tool * tool __maybe_unused)733 static int arm_spe_process_auxtrace_event(struct perf_session *session,
734 union perf_event *event,
735 struct perf_tool *tool __maybe_unused)
736 {
737 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
738 auxtrace);
739
740 if (!spe->data_queued) {
741 struct auxtrace_buffer *buffer;
742 off_t data_offset;
743 int fd = perf_data__fd(session->data);
744 int err;
745
746 if (perf_data__is_pipe(session->data)) {
747 data_offset = 0;
748 } else {
749 data_offset = lseek(fd, 0, SEEK_CUR);
750 if (data_offset == -1)
751 return -errno;
752 }
753
754 err = auxtrace_queues__add_event(&spe->queues, session, event,
755 data_offset, &buffer);
756 if (err)
757 return err;
758
759 /* Dump here now we have copied a piped trace out of the pipe */
760 if (dump_trace) {
761 if (auxtrace_buffer__get_data(buffer, fd)) {
762 arm_spe_dump_event(spe, buffer->data,
763 buffer->size);
764 auxtrace_buffer__put_data(buffer);
765 }
766 }
767 }
768
769 return 0;
770 }
771
arm_spe_flush(struct perf_session * session __maybe_unused,struct perf_tool * tool __maybe_unused)772 static int arm_spe_flush(struct perf_session *session __maybe_unused,
773 struct perf_tool *tool __maybe_unused)
774 {
775 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
776 auxtrace);
777 int ret;
778
779 if (dump_trace)
780 return 0;
781
782 if (!tool->ordered_events)
783 return -EINVAL;
784
785 ret = arm_spe__update_queues(spe);
786 if (ret < 0)
787 return ret;
788
789 if (spe->timeless_decoding)
790 return arm_spe_process_timeless_queues(spe, -1,
791 MAX_TIMESTAMP - 1);
792
793 return arm_spe_process_queues(spe, MAX_TIMESTAMP);
794 }
795
arm_spe_free_queue(void * priv)796 static void arm_spe_free_queue(void *priv)
797 {
798 struct arm_spe_queue *speq = priv;
799
800 if (!speq)
801 return;
802 thread__zput(speq->thread);
803 arm_spe_decoder_free(speq->decoder);
804 zfree(&speq->event_buf);
805 free(speq);
806 }
807
arm_spe_free_events(struct perf_session * session)808 static void arm_spe_free_events(struct perf_session *session)
809 {
810 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
811 auxtrace);
812 struct auxtrace_queues *queues = &spe->queues;
813 unsigned int i;
814
815 for (i = 0; i < queues->nr_queues; i++) {
816 arm_spe_free_queue(queues->queue_array[i].priv);
817 queues->queue_array[i].priv = NULL;
818 }
819 auxtrace_queues__free(queues);
820 }
821
arm_spe_free(struct perf_session * session)822 static void arm_spe_free(struct perf_session *session)
823 {
824 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
825 auxtrace);
826
827 auxtrace_heap__free(&spe->heap);
828 arm_spe_free_events(session);
829 session->auxtrace = NULL;
830 free(spe);
831 }
832
arm_spe_evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)833 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
834 struct evsel *evsel)
835 {
836 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
837
838 return evsel->core.attr.type == spe->pmu_type;
839 }
840
841 static const char * const arm_spe_info_fmts[] = {
842 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
843 };
844
arm_spe_print_info(__u64 * arr)845 static void arm_spe_print_info(__u64 *arr)
846 {
847 if (!dump_trace)
848 return;
849
850 fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
851 }
852
853 struct arm_spe_synth {
854 struct perf_tool dummy_tool;
855 struct perf_session *session;
856 };
857
arm_spe_event_synth(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)858 static int arm_spe_event_synth(struct perf_tool *tool,
859 union perf_event *event,
860 struct perf_sample *sample __maybe_unused,
861 struct machine *machine __maybe_unused)
862 {
863 struct arm_spe_synth *arm_spe_synth =
864 container_of(tool, struct arm_spe_synth, dummy_tool);
865
866 return perf_session__deliver_synth_event(arm_spe_synth->session,
867 event, NULL);
868 }
869
arm_spe_synth_event(struct perf_session * session,struct perf_event_attr * attr,u64 id)870 static int arm_spe_synth_event(struct perf_session *session,
871 struct perf_event_attr *attr, u64 id)
872 {
873 struct arm_spe_synth arm_spe_synth;
874
875 memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
876 arm_spe_synth.session = session;
877
878 return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
879 &id, arm_spe_event_synth);
880 }
881
arm_spe_set_event_name(struct evlist * evlist,u64 id,const char * name)882 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
883 const char *name)
884 {
885 struct evsel *evsel;
886
887 evlist__for_each_entry(evlist, evsel) {
888 if (evsel->core.id && evsel->core.id[0] == id) {
889 if (evsel->name)
890 zfree(&evsel->name);
891 evsel->name = strdup(name);
892 break;
893 }
894 }
895 }
896
897 static int
arm_spe_synth_events(struct arm_spe * spe,struct perf_session * session)898 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
899 {
900 struct evlist *evlist = session->evlist;
901 struct evsel *evsel;
902 struct perf_event_attr attr;
903 bool found = false;
904 u64 id;
905 int err;
906
907 evlist__for_each_entry(evlist, evsel) {
908 if (evsel->core.attr.type == spe->pmu_type) {
909 found = true;
910 break;
911 }
912 }
913
914 if (!found) {
915 pr_debug("No selected events with SPE trace data\n");
916 return 0;
917 }
918
919 memset(&attr, 0, sizeof(struct perf_event_attr));
920 attr.size = sizeof(struct perf_event_attr);
921 attr.type = PERF_TYPE_HARDWARE;
922 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
923 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
924 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
925 PERF_SAMPLE_ADDR;
926 if (spe->timeless_decoding)
927 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
928 else
929 attr.sample_type |= PERF_SAMPLE_TIME;
930
931 spe->sample_type = attr.sample_type;
932
933 attr.exclude_user = evsel->core.attr.exclude_user;
934 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
935 attr.exclude_hv = evsel->core.attr.exclude_hv;
936 attr.exclude_host = evsel->core.attr.exclude_host;
937 attr.exclude_guest = evsel->core.attr.exclude_guest;
938 attr.sample_id_all = evsel->core.attr.sample_id_all;
939 attr.read_format = evsel->core.attr.read_format;
940
941 /* create new id val to be a fixed offset from evsel id */
942 id = evsel->core.id[0] + 1000000000;
943
944 if (!id)
945 id = 1;
946
947 if (spe->synth_opts.flc) {
948 spe->sample_flc = true;
949
950 /* Level 1 data cache miss */
951 err = arm_spe_synth_event(session, &attr, id);
952 if (err)
953 return err;
954 spe->l1d_miss_id = id;
955 arm_spe_set_event_name(evlist, id, "l1d-miss");
956 id += 1;
957
958 /* Level 1 data cache access */
959 err = arm_spe_synth_event(session, &attr, id);
960 if (err)
961 return err;
962 spe->l1d_access_id = id;
963 arm_spe_set_event_name(evlist, id, "l1d-access");
964 id += 1;
965 }
966
967 if (spe->synth_opts.llc) {
968 spe->sample_llc = true;
969
970 /* Last level cache miss */
971 err = arm_spe_synth_event(session, &attr, id);
972 if (err)
973 return err;
974 spe->llc_miss_id = id;
975 arm_spe_set_event_name(evlist, id, "llc-miss");
976 id += 1;
977
978 /* Last level cache access */
979 err = arm_spe_synth_event(session, &attr, id);
980 if (err)
981 return err;
982 spe->llc_access_id = id;
983 arm_spe_set_event_name(evlist, id, "llc-access");
984 id += 1;
985 }
986
987 if (spe->synth_opts.tlb) {
988 spe->sample_tlb = true;
989
990 /* TLB miss */
991 err = arm_spe_synth_event(session, &attr, id);
992 if (err)
993 return err;
994 spe->tlb_miss_id = id;
995 arm_spe_set_event_name(evlist, id, "tlb-miss");
996 id += 1;
997
998 /* TLB access */
999 err = arm_spe_synth_event(session, &attr, id);
1000 if (err)
1001 return err;
1002 spe->tlb_access_id = id;
1003 arm_spe_set_event_name(evlist, id, "tlb-access");
1004 id += 1;
1005 }
1006
1007 if (spe->synth_opts.branches) {
1008 spe->sample_branch = true;
1009
1010 /* Branch miss */
1011 err = arm_spe_synth_event(session, &attr, id);
1012 if (err)
1013 return err;
1014 spe->branch_miss_id = id;
1015 arm_spe_set_event_name(evlist, id, "branch-miss");
1016 id += 1;
1017 }
1018
1019 if (spe->synth_opts.remote_access) {
1020 spe->sample_remote_access = true;
1021
1022 /* Remote access */
1023 err = arm_spe_synth_event(session, &attr, id);
1024 if (err)
1025 return err;
1026 spe->remote_access_id = id;
1027 arm_spe_set_event_name(evlist, id, "remote-access");
1028 id += 1;
1029 }
1030
1031 if (spe->synth_opts.mem) {
1032 spe->sample_memory = true;
1033
1034 err = arm_spe_synth_event(session, &attr, id);
1035 if (err)
1036 return err;
1037 spe->memory_id = id;
1038 arm_spe_set_event_name(evlist, id, "memory");
1039 }
1040
1041 return 0;
1042 }
1043
arm_spe_process_auxtrace_info(union perf_event * event,struct perf_session * session)1044 int arm_spe_process_auxtrace_info(union perf_event *event,
1045 struct perf_session *session)
1046 {
1047 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1048 size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1049 struct perf_record_time_conv *tc = &session->time_conv;
1050 struct arm_spe *spe;
1051 int err;
1052
1053 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1054 min_sz)
1055 return -EINVAL;
1056
1057 spe = zalloc(sizeof(struct arm_spe));
1058 if (!spe)
1059 return -ENOMEM;
1060
1061 err = auxtrace_queues__init(&spe->queues);
1062 if (err)
1063 goto err_free;
1064
1065 spe->session = session;
1066 spe->machine = &session->machines.host; /* No kvm support */
1067 spe->auxtrace_type = auxtrace_info->type;
1068 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1069
1070 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1071
1072 /*
1073 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1074 * and the parameters for hardware clock are stored in the session
1075 * context. Passes these parameters to the struct perf_tsc_conversion
1076 * in "spe->tc", which is used for later conversion between clock
1077 * counter and timestamp.
1078 *
1079 * For backward compatibility, copies the fields starting from
1080 * "time_cycles" only if they are contained in the event.
1081 */
1082 spe->tc.time_shift = tc->time_shift;
1083 spe->tc.time_mult = tc->time_mult;
1084 spe->tc.time_zero = tc->time_zero;
1085
1086 if (event_contains(*tc, time_cycles)) {
1087 spe->tc.time_cycles = tc->time_cycles;
1088 spe->tc.time_mask = tc->time_mask;
1089 spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1090 spe->tc.cap_user_time_short = tc->cap_user_time_short;
1091 }
1092
1093 spe->auxtrace.process_event = arm_spe_process_event;
1094 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1095 spe->auxtrace.flush_events = arm_spe_flush;
1096 spe->auxtrace.free_events = arm_spe_free_events;
1097 spe->auxtrace.free = arm_spe_free;
1098 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1099 session->auxtrace = &spe->auxtrace;
1100
1101 arm_spe_print_info(&auxtrace_info->priv[0]);
1102
1103 if (dump_trace)
1104 return 0;
1105
1106 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1107 spe->synth_opts = *session->itrace_synth_opts;
1108 else
1109 itrace_synth_opts__set_default(&spe->synth_opts, false);
1110
1111 err = arm_spe_synth_events(spe, session);
1112 if (err)
1113 goto err_free_queues;
1114
1115 err = auxtrace_queues__process_index(&spe->queues, session);
1116 if (err)
1117 goto err_free_queues;
1118
1119 if (spe->queues.populated)
1120 spe->data_queued = true;
1121
1122 return 0;
1123
1124 err_free_queues:
1125 auxtrace_queues__free(&spe->queues);
1126 session->auxtrace = NULL;
1127 err_free:
1128 free(spe);
1129 return err;
1130 }
1131