Lines Matching +full:spe +full:- +full:pmu
1 // SPDX-License-Identifier: GPL-2.0
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
28 #include "thread-stack.h"
30 #include "util/synthetic-events.h"
32 #include "arm-spe.h"
33 #include "arm-spe-decoder/arm-spe-decoder.h"
34 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
73 struct arm_spe *spe;
89 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
99 ". ... ARM SPE data: size %zu bytes\n",
124 len -= pkt_len;
128 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
132 arm_spe_dump(spe, buf, len);
138 struct auxtrace_buffer *buffer = speq->buffer;
139 struct auxtrace_buffer *old_buffer = speq->old_buffer;
142 queue = &speq->spe->queues.queue_array[speq->queue_nr];
149 b->len = 0;
153 speq->buffer = buffer;
156 if (!buffer->data) {
158 int fd = perf_data__fd(speq->spe->session->data);
160 buffer->data = auxtrace_buffer__get_data(buffer, fd);
161 if (!buffer->data)
162 return -ENOMEM;
165 b->len = buffer->size;
166 b->buf = buffer->data;
168 if (b->len) {
171 speq->old_buffer = buffer;
180 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
190 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
191 if (!speq->event_buf)
194 speq->spe = spe;
195 speq->queue_nr = queue_nr;
196 speq->pid = -1;
197 speq->tid = -1;
198 speq->cpu = -1;
205 speq->decoder = arm_spe_decoder_new(¶ms);
206 if (!speq->decoder)
212 zfree(&speq->event_buf);
218 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
220 return ip >= spe->kernel_start ?
225 static void arm_spe_prep_sample(struct arm_spe *spe,
230 struct arm_spe_record *record = &speq->decoder->record;
232 if (!spe->timeless_decoding)
233 sample->time = speq->timestamp;
235 sample->ip = record->from_ip;
236 sample->cpumode = arm_spe_cpumode(spe, sample->ip);
237 sample->pid = speq->pid;
238 sample->tid = speq->tid;
239 sample->addr = record->to_ip;
240 sample->period = 1;
241 sample->cpu = speq->cpu;
243 event->sample.header.type = PERF_RECORD_SAMPLE;
244 event->sample.header.misc = sample->cpumode;
245 event->sample.header.size = sizeof(struct perf_event_header);
250 event->header.size = perf_event__sample_event_size(sample, type, 0);
255 arm_spe_deliver_synth_event(struct arm_spe *spe,
262 if (spe->synth_opts.inject) {
263 ret = arm_spe__inject_event(event, sample, spe->sample_type);
268 ret = perf_session__deliver_synth_event(spe->session, event, sample);
270 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
279 struct arm_spe *spe = speq->spe;
280 union perf_event *event = speq->event_buf;
283 arm_spe_prep_sample(spe, speq, event, &sample);
288 return arm_spe_deliver_synth_event(spe, speq, event, &sample);
293 const struct arm_spe_record *record = &speq->decoder->record;
294 struct arm_spe *spe = speq->spe;
297 if (spe->sample_flc) {
298 if (record->type & ARM_SPE_L1D_MISS) {
300 speq, spe->l1d_miss_id);
305 if (record->type & ARM_SPE_L1D_ACCESS) {
307 speq, spe->l1d_access_id);
313 if (spe->sample_llc) {
314 if (record->type & ARM_SPE_LLC_MISS) {
316 speq, spe->llc_miss_id);
321 if (record->type & ARM_SPE_LLC_ACCESS) {
323 speq, spe->llc_access_id);
329 if (spe->sample_tlb) {
330 if (record->type & ARM_SPE_TLB_MISS) {
332 speq, spe->tlb_miss_id);
337 if (record->type & ARM_SPE_TLB_ACCESS) {
339 speq, spe->tlb_access_id);
345 if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
347 spe->branch_miss_id);
352 if (spe->sample_remote_access &&
353 (record->type & ARM_SPE_REMOTE_ACCESS)) {
355 spe->remote_access_id);
365 struct arm_spe *spe = speq->spe;
368 if (!spe->kernel_start)
369 spe->kernel_start = machine__kernel_start(spe->machine);
372 ret = arm_spe_decode(speq->decoder);
379 * Error is detected when decode SPE trace data, continue to
389 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
390 *timestamp = speq->timestamp;
398 static int arm_spe__setup_queue(struct arm_spe *spe,
402 struct arm_spe_queue *speq = queue->priv;
405 if (list_empty(&queue->head) || speq)
408 speq = arm_spe__alloc_queue(spe, queue_nr);
411 return -ENOMEM;
413 queue->priv = speq;
415 if (queue->cpu != -1)
416 speq->cpu = queue->cpu;
418 if (!speq->on_heap) {
421 if (spe->timeless_decoding)
425 ret = arm_spe_decode(speq->decoder);
433 record = &speq->decoder->record;
435 speq->timestamp = record->timestamp;
436 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
439 speq->on_heap = true;
445 static int arm_spe__setup_queues(struct arm_spe *spe)
450 for (i = 0; i < spe->queues.nr_queues; i++) {
451 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
459 static int arm_spe__update_queues(struct arm_spe *spe)
461 if (spe->queues.new_data) {
462 spe->queues.new_data = false;
463 return arm_spe__setup_queues(spe);
469 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
472 struct evlist *evlist = spe->session->evlist;
480 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
487 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
490 struct arm_spe_queue *speq = queue->priv;
493 tid = machine__get_current_tid(spe->machine, speq->cpu);
494 if (tid != -1) {
495 speq->tid = tid;
496 thread__zput(speq->thread);
498 speq->tid = queue->tid;
500 if ((!speq->thread) && (speq->tid != -1)) {
501 speq->thread = machine__find_thread(spe->machine, -1,
502 speq->tid);
505 if (speq->thread) {
506 speq->pid = speq->thread->pid_;
507 if (queue->cpu == -1)
508 speq->cpu = speq->thread->cpu;
512 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
522 if (!spe->heap.heap_cnt)
525 if (spe->heap.heap_array[0].ordinal >= timestamp)
528 queue_nr = spe->heap.heap_array[0].queue_nr;
529 queue = &spe->queues.queue_array[queue_nr];
530 speq = queue->priv;
532 auxtrace_heap__pop(&spe->heap);
534 if (spe->heap.heap_cnt) {
535 ts = spe->heap.heap_array[0].ordinal + 1;
542 arm_spe_set_pid_tid_cpu(spe, queue);
546 auxtrace_heap__add(&spe->heap, queue_nr, ts);
551 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
555 speq->on_heap = false;
562 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
565 struct auxtrace_queues *queues = &spe->queues;
569 for (i = 0; i < queues->nr_queues; i++) {
570 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
571 struct arm_spe_queue *speq = queue->priv;
573 if (speq && (tid == -1 || speq->tid == tid)) {
574 speq->time = time_;
575 arm_spe_set_pid_tid_cpu(spe, queue);
589 struct arm_spe *spe = container_of(session->auxtrace,
595 if (!tool->ordered_events) {
596 pr_err("SPE trace requires ordered events\n");
597 return -EINVAL;
600 if (sample->time && (sample->time != (u64) -1))
601 timestamp = sample->time;
605 if (timestamp || spe->timeless_decoding) {
606 err = arm_spe__update_queues(spe);
611 if (spe->timeless_decoding) {
612 if (event->header.type == PERF_RECORD_EXIT) {
613 err = arm_spe_process_timeless_queues(spe,
614 event->fork.tid,
615 sample->time);
618 if (event->header.type == PERF_RECORD_EXIT) {
619 err = arm_spe_process_queues(spe, timestamp);
632 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
635 if (!spe->data_queued) {
638 int fd = perf_data__fd(session->data);
641 if (perf_data__is_pipe(session->data)) {
645 if (data_offset == -1)
646 return -errno;
649 err = auxtrace_queues__add_event(&spe->queues, session, event,
657 arm_spe_dump_event(spe, buffer->data,
658 buffer->size);
670 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
677 if (!tool->ordered_events)
678 return -EINVAL;
680 ret = arm_spe__update_queues(spe);
684 if (spe->timeless_decoding)
685 return arm_spe_process_timeless_queues(spe, -1,
686 MAX_TIMESTAMP - 1);
688 return arm_spe_process_queues(spe, MAX_TIMESTAMP);
697 thread__zput(speq->thread);
698 arm_spe_decoder_free(speq->decoder);
699 zfree(&speq->event_buf);
705 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
707 struct auxtrace_queues *queues = &spe->queues;
710 for (i = 0; i < queues->nr_queues; i++) {
711 arm_spe_free_queue(queues->queue_array[i].priv);
712 queues->queue_array[i].priv = NULL;
719 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
722 auxtrace_heap__free(&spe->heap);
724 session->auxtrace = NULL;
725 free(spe);
731 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
733 return evsel->core.attr.type == spe->pmu_type;
737 [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
761 return perf_session__deliver_synth_event(arm_spe_synth->session,
783 if (evsel->core.id && evsel->core.id[0] == id) {
784 if (evsel->name)
785 zfree(&evsel->name);
786 evsel->name = strdup(name);
793 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
795 struct evlist *evlist = session->evlist;
803 if (evsel->core.attr.type == spe->pmu_type) {
810 pr_debug("No selected events with SPE trace data\n");
817 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
820 if (spe->timeless_decoding)
825 spe->sample_type = attr.sample_type;
827 attr.exclude_user = evsel->core.attr.exclude_user;
828 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
829 attr.exclude_hv = evsel->core.attr.exclude_hv;
830 attr.exclude_host = evsel->core.attr.exclude_host;
831 attr.exclude_guest = evsel->core.attr.exclude_guest;
832 attr.sample_id_all = evsel->core.attr.sample_id_all;
833 attr.read_format = evsel->core.attr.read_format;
836 id = evsel->core.id[0] + 1000000000;
841 if (spe->synth_opts.flc) {
842 spe->sample_flc = true;
848 spe->l1d_miss_id = id;
849 arm_spe_set_event_name(evlist, id, "l1d-miss");
856 spe->l1d_access_id = id;
857 arm_spe_set_event_name(evlist, id, "l1d-access");
861 if (spe->synth_opts.llc) {
862 spe->sample_llc = true;
868 spe->llc_miss_id = id;
869 arm_spe_set_event_name(evlist, id, "llc-miss");
876 spe->llc_access_id = id;
877 arm_spe_set_event_name(evlist, id, "llc-access");
881 if (spe->synth_opts.tlb) {
882 spe->sample_tlb = true;
888 spe->tlb_miss_id = id;
889 arm_spe_set_event_name(evlist, id, "tlb-miss");
896 spe->tlb_access_id = id;
897 arm_spe_set_event_name(evlist, id, "tlb-access");
901 if (spe->synth_opts.branches) {
902 spe->sample_branch = true;
908 spe->branch_miss_id = id;
909 arm_spe_set_event_name(evlist, id, "branch-miss");
913 if (spe->synth_opts.remote_access) {
914 spe->sample_remote_access = true;
920 spe->remote_access_id = id;
921 arm_spe_set_event_name(evlist, id, "remote-access");
931 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
933 struct arm_spe *spe;
936 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
938 return -EINVAL;
940 spe = zalloc(sizeof(struct arm_spe));
941 if (!spe)
942 return -ENOMEM;
944 err = auxtrace_queues__init(&spe->queues);
948 spe->session = session;
949 spe->machine = &session->machines.host; /* No kvm support */
950 spe->auxtrace_type = auxtrace_info->type;
951 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
953 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
954 spe->auxtrace.process_event = arm_spe_process_event;
955 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
956 spe->auxtrace.flush_events = arm_spe_flush;
957 spe->auxtrace.free_events = arm_spe_free_events;
958 spe->auxtrace.free = arm_spe_free;
959 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
960 session->auxtrace = &spe->auxtrace;
962 arm_spe_print_info(&auxtrace_info->priv[0]);
967 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
968 spe->synth_opts = *session->itrace_synth_opts;
970 itrace_synth_opts__set_default(&spe->synth_opts, false);
972 err = arm_spe_synth_events(spe, session);
976 err = auxtrace_queues__process_index(&spe->queues, session);
980 if (spe->queues.populated)
981 spe->data_queued = true;
986 auxtrace_queues__free(&spe->queues);
987 session->auxtrace = NULL;
989 free(spe);