1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "fd_pps_driver.h"
8
9 #include <cstring>
10 #include <iostream>
11 #include <perfetto.h>
12
13 #include "pps/pps.h"
14 #include "pps/pps_algorithm.h"
15
16 namespace pps
17 {
18
19 double
safe_div(uint64_t a,uint64_t b)20 safe_div(uint64_t a, uint64_t b)
21 {
22 if (b == 0)
23 return 0;
24
25 return a / static_cast<double>(b);
26 }
27
28 float
percent(uint64_t a,uint64_t b)29 percent(uint64_t a, uint64_t b)
30 {
31 /* Sometimes we get bogus values but we want for the timeline
32 * to look nice without higher than 100% values.
33 */
34 if (b == 0 || a > b)
35 return 0;
36
37 return 100.f * (a / static_cast<double>(b));
38 }
39
40 bool
is_dump_perfcnt_preemptible() const41 FreedrenoDriver::is_dump_perfcnt_preemptible() const
42 {
43 return false;
44 }
45
46 uint64_t
get_min_sampling_period_ns()47 FreedrenoDriver::get_min_sampling_period_ns()
48 {
49 return 100000;
50 }
51
52 /*
53 TODO this sees like it would be largely the same for a5xx as well
54 (ie. same countable names)..
55 */
56 void
setup_a6xx_counters()57 FreedrenoDriver::setup_a6xx_counters()
58 {
59 /* TODO is there a reason to want more than one group? */
60 CounterGroup group = {};
61 group.name = "counters";
62 groups.clear();
63 counters.clear();
64 countables.clear();
65 enabled_counters.clear();
66 groups.emplace_back(std::move(group));
67
68 /*
69 * Create the countables that we'll be using.
70 */
71
72 auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
73 auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
74 auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
75 auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
76 auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
77
78 auto PERF_TP_OUTPUT_PIXELS = countable("PERF_TP_OUTPUT_PIXELS");
79 auto PERF_TP_OUTPUT_PIXELS_ANISO = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
80 auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
81 auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
82 auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
83
84 auto PERF_TSE_INPUT_PRIM = countable("PERF_TSE_INPUT_PRIM");
85 auto PERF_TSE_CLIPPED_PRIM = countable("PERF_TSE_CLIPPED_PRIM");
86 auto PERF_TSE_TRIVAL_REJ_PRIM = countable("PERF_TSE_TRIVAL_REJ_PRIM");
87 auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
88
89 auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
90 auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
91 auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
92 auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
93 auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
94 auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
95 auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
96 auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
97 auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
98 auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
99 auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
100 auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
101 auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
102
103 auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
104 auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
105 auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
106 auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
107 auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
108
109 auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
110 auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
111 auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
112
113 auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
114
115 auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
116 auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
117
118 /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
119 // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
120
121 /*
122 * And then setup the derived counters that we are exporting to
123 * pps based on the captured countable values.
124 *
125 * We try to expose the same counters as blob:
126 * https://gpuinspector.dev/docs/gpu-counters/qualcomm
127 */
128
129 counter("GPU Frequency", Counter::Units::Hertz, [=]() {
130 return PERF_CP_ALWAYS_COUNT / time;
131 }
132 );
133
134 counter("GPU % Utilization", Counter::Units::Percent, [=]() {
135 return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
136 }
137 );
138
139 counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
140 return PERF_TP_L1_CACHELINE_MISSES / time;
141 }
142 );
143
144 counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
145 return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
146 }
147 );
148
149 /* TODO: verify */
150 counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
151 return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
152 }
153 );
154
155 /* TODO: verify */
156 counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
157 return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
158 }
159 );
160
161 counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
162 return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
163 }
164 );
165
166 counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
167 return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
168 }
169 );
170
171 counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
172 return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
173 }
174 );
175
176 /* TODO: verify */
177 counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
178 return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
179 }
180 );
181
182 counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
183 return PERF_TSE_INPUT_PRIM * (1.f / time);
184 }
185 );
186
187 counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
188 return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
189 }
190 );
191
192 counter("% Prims Clipped", Counter::Units::Percent, [=]() {
193 return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
194 }
195 );
196
197 counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
198 return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
199 }
200 );
201
202 counter("Reused Vertices / Second", Counter::Units::None, [=]() {
203 return PERF_PC_VERTEX_HITS * (1.f / time);
204 }
205 );
206
207 counter("Average Polygon Area", Counter::Units::None, [=]() {
208 return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
209 }
210 );
211
212 /* TODO: find formula */
213 // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
214 // return 100.0 * 0;
215 // }
216 // );
217
218 counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
219 return PERF_PC_VS_INVOCATIONS * (1.f / time);
220 }
221 );
222
223 counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
224 return PERF_HLSQ_QUADS * 4 * (1.f / time);
225 }
226 );
227
228 counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
229 return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
230 PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
231 }
232 );
233
234 counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
235 return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
236 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
237 PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
238 }
239 );
240
241 counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
242 return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
243 }
244 );
245
246 counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
247 return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
248 }
249 );
250
251 counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
252 return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
253 }
254 );
255
256 counter("Textures / Vertex", Counter::Units::None, [=]() {
257 return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
258 }
259 );
260
261 counter("Textures / Fragment", Counter::Units::None, [=]() {
262 return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
263 }
264 );
265
266 counter("ALU / Vertex", Counter::Units::None, [=]() {
267 return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
268 }
269 );
270
271 counter("EFU / Vertex", Counter::Units::None, [=]() {
272 return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
273 }
274 );
275
276 counter("ALU / Fragment", Counter::Units::None, [=]() {
277 return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
278 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
279 }
280 );
281
282 counter("EFU / Fragment", Counter::Units::None, [=]() {
283 return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
284 }
285 );
286
287 counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
288 return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
289 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
290 PERF_SP_ANY_EU_WORKING_FS_STAGE +
291 PERF_SP_ANY_EU_WORKING_CS_STAGE));
292 }
293 );
294
295 counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
296 return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
297 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
298 PERF_SP_ANY_EU_WORKING_FS_STAGE +
299 PERF_SP_ANY_EU_WORKING_CS_STAGE));
300 }
301 );
302
303 counter("% Time Compute", Counter::Units::Percent, [=]() {
304 return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
305 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
306 PERF_SP_ANY_EU_WORKING_FS_STAGE +
307 PERF_SP_ANY_EU_WORKING_CS_STAGE));
308 }
309 );
310
311 counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
312 return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
313 PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
314 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
315 PERF_SP_BUSY_CYCLES);
316 }
317 );
318
319 counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
320 return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
321 }
322 );
323
324 counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
325 return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
326 }
327 );
328
329 counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
330 return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
331 }
332 );
333
334 counter("% Linear Filtered", Counter::Units::Percent, [=]() {
335 return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
336 }
337 );
338
339 counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
340 return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
341 }
342 );
343
344 counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
345 return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
346 }
347 );
348
349 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
350 // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
351 // return * (1.f / time);
352 // }
353 // );
354
355 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
356 // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
357 // return * (1.f / time);
358 // }
359 // );
360
361 /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
362 // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
363 // return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
364 // }
365 // );
366
367 /* TODO: verify */
368 counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
369 return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
370 }
371 );
372
373 /* TODO: verify */
374 counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
375 return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
376 }
377 );
378
379 counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
380 return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
381 }
382 );
383
384 counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
385 return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
386 }
387 );
388
389 counter("Preemptions / second", Counter::Units::None, [=]() {
390 return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
391 }
392 );
393
394 counter("Avg Preemption Delay", Counter::Units::None, [=]() {
395 return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
396 }
397 );
398 }
399
400 /**
401 * Generate an submit the cmdstream to configure the counter/countable
402 * muxing
403 */
404 void
configure_counters(bool reset,bool wait)405 FreedrenoDriver::configure_counters(bool reset, bool wait)
406 {
407 struct fd_submit *submit = fd_submit_new(pipe);
408 enum fd_ringbuffer_flags flags =
409 (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
410 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
411
412 for (const auto &countable : countables)
413 countable.configure(ring, reset);
414
415 struct fd_fence *fence = fd_submit_flush(submit, -1, false);
416
417 fd_fence_flush(fence);
418 fd_fence_del(fence);
419
420 fd_ringbuffer_del(ring);
421 fd_submit_del(submit);
422
423 if (wait)
424 fd_pipe_wait(pipe, fence);
425 }
426
427 /**
428 * Read the current counter values and record the time.
429 */
430 void
collect_countables()431 FreedrenoDriver::collect_countables()
432 {
433 last_dump_ts = perfetto::base::GetBootTimeNs().count();
434
435 for (const auto &countable : countables)
436 countable.collect();
437 }
438
439 bool
init_perfcnt()440 FreedrenoDriver::init_perfcnt()
441 {
442 uint64_t val;
443
444 dev = fd_device_new(drm_device.fd);
445 pipe = fd_pipe_new2(dev, FD_PIPE_3D, 0);
446 dev_id = fd_pipe_dev_id(pipe);
447
448 if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
449 PERFETTO_FATAL("Could not get MAX_FREQ");
450 return false;
451 }
452 max_freq = val;
453
454 if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
455 PERFETTO_ILOG("Could not get SUSPEND_COUNT");
456 } else {
457 suspend_count = val;
458 has_suspend_count = true;
459 }
460
461 fd_pipe_set_param(pipe, FD_SYSPROF, 1);
462
463 perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
464 if (num_perfcntrs == 0) {
465 PERFETTO_FATAL("No hw counters available");
466 return false;
467 }
468
469 assigned_counters.resize(num_perfcntrs);
470 assigned_counters.assign(assigned_counters.size(), 0);
471
472 switch (fd_dev_gen(dev_id)) {
473 case 6:
474 setup_a6xx_counters();
475 break;
476 default:
477 PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
478 return false;
479 }
480
481 state.resize(next_countable_id);
482
483 for (const auto &countable : countables)
484 countable.resolve();
485
486 info = fd_dev_info_raw(dev_id);
487
488 io = fd_dt_find_io();
489 if (!io) {
490 PERFETTO_FATAL("Could not map GPU I/O space");
491 return false;
492 }
493
494 configure_counters(true, true);
495 collect_countables();
496
497 return true;
498 }
499
500 void
enable_counter(const uint32_t counter_id)501 FreedrenoDriver::enable_counter(const uint32_t counter_id)
502 {
503 enabled_counters.push_back(counters[counter_id]);
504 }
505
506 void
enable_all_counters()507 FreedrenoDriver::enable_all_counters()
508 {
509 enabled_counters.reserve(counters.size());
510 for (auto &counter : counters) {
511 enabled_counters.push_back(counter);
512 }
513 }
514
515 void
enable_perfcnt(const uint64_t)516 FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
517 {
518 }
519
520 bool
dump_perfcnt()521 FreedrenoDriver::dump_perfcnt()
522 {
523 if (has_suspend_count) {
524 uint64_t val;
525
526 fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
527
528 if (suspend_count != val) {
529 PERFETTO_ILOG("Device had suspended!");
530
531 suspend_count = val;
532
533 configure_counters(true, true);
534 collect_countables();
535
536 /* We aren't going to have anything sensible by comparing
537 * current values to values from prior to the suspend, so
538 * just skip this sampling period.
539 */
540 return false;
541 }
542 }
543
544 auto last_ts = last_dump_ts;
545
546 /* Capture the timestamp from the *start* of the sampling period: */
547 last_capture_ts = last_dump_ts;
548
549 collect_countables();
550
551 auto elapsed_time_ns = last_dump_ts - last_ts;
552
553 time = (float)elapsed_time_ns / 1000000000.0;
554
555 /* On older kernels that dont' support querying the suspend-
556 * count, just send configuration cmdstream regularly to keep
557 * the GPU alive and correctly configured for the countables
558 * we want
559 */
560 if (!has_suspend_count) {
561 configure_counters(false, false);
562 }
563
564 return true;
565 }
566
next()567 uint64_t FreedrenoDriver::next()
568 {
569 auto ret = last_capture_ts;
570 last_capture_ts = 0;
571 return ret;
572 }
573
disable_perfcnt()574 void FreedrenoDriver::disable_perfcnt()
575 {
576 /* There isn't really any disable, only reconfiguring which countables
577 * get muxed to which counters
578 */
579 }
580
581 /*
582 * Countable
583 */
584
585 FreedrenoDriver::Countable
countable(std::string name)586 FreedrenoDriver::countable(std::string name)
587 {
588 auto countable = Countable(this, name);
589 countables.emplace_back(countable);
590 return countable;
591 }
592
Countable(FreedrenoDriver * d,std::string name)593 FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
594 : id {d->next_countable_id++}, d {d}, name {name}
595 {
596 }
597
598 /* Emit register writes on ring to configure counter/countable muxing: */
599 void
configure(struct fd_ringbuffer * ring,bool reset) const600 FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) const
601 {
602 const struct fd_perfcntr_countable *countable = d->state[id].countable;
603 const struct fd_perfcntr_counter *counter = d->state[id].counter;
604
605 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
606
607 if (counter->enable && reset) {
608 OUT_PKT4(ring, counter->enable, 1);
609 OUT_RING(ring, 0);
610 }
611
612 if (counter->clear && reset) {
613 OUT_PKT4(ring, counter->clear, 1);
614 OUT_RING(ring, 1);
615
616 OUT_PKT4(ring, counter->clear, 1);
617 OUT_RING(ring, 0);
618 }
619
620 OUT_PKT4(ring, counter->select_reg, 1);
621 OUT_RING(ring, countable->selector);
622
623 if (counter->enable && reset) {
624 OUT_PKT4(ring, counter->enable, 1);
625 OUT_RING(ring, 1);
626 }
627 }
628
629 /* Collect current counter value and calculate delta since last sample: */
630 void
collect() const631 FreedrenoDriver::Countable::collect() const
632 {
633 const struct fd_perfcntr_counter *counter = d->state[id].counter;
634
635 d->state[id].last_value = d->state[id].value;
636
637 /* this is true on a5xx and later */
638 assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
639 uint64_t *reg = (uint64_t *)((uint32_t *)d->io + counter->counter_reg_lo);
640
641 d->state[id].value = *reg;
642 }
643
644 /* Resolve the countable and assign next counter from it's group: */
645 void
resolve() const646 FreedrenoDriver::Countable::resolve() const
647 {
648 for (unsigned i = 0; i < d->num_perfcntrs; i++) {
649 const struct fd_perfcntr_group *g = &d->perfcntrs[i];
650 for (unsigned j = 0; j < g->num_countables; j++) {
651 const struct fd_perfcntr_countable *c = &g->countables[j];
652 if (name == c->name) {
653 d->state[id].countable = c;
654
655 /* Assign a counter from the same group: */
656 assert(d->assigned_counters[i] < g->num_counters);
657 d->state[id].counter = &g->counters[d->assigned_counters[i]++];
658
659 std::cout << "Countable: " << name << ", group=" << g->name <<
660 ", counter=" << d->assigned_counters[i] - 1 << "\n";
661
662 return;
663 }
664 }
665 }
666 unreachable("no such countable!");
667 }
668
669 uint64_t
get_value() const670 FreedrenoDriver::Countable::get_value() const
671 {
672 return d->state[id].value - d->state[id].last_value;
673 }
674
675 /*
676 * DerivedCounter
677 */
678
DerivedCounter(FreedrenoDriver * d,std::string name,Counter::Units units,std::function<int64_t ()> derive)679 FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
680 Counter::Units units,
681 std::function<int64_t()> derive)
682 : Counter(d->next_counter_id++, name, 0)
683 {
684 std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
685 this->units = units;
686 set_getter([=](const Counter &c, const Driver &d) {
687 return derive();
688 }
689 );
690 }
691
692 FreedrenoDriver::DerivedCounter
counter(std::string name,Counter::Units units,std::function<int64_t ()> derive)693 FreedrenoDriver::counter(std::string name, Counter::Units units,
694 std::function<int64_t()> derive)
695 {
696 auto counter = DerivedCounter(this, name, units, derive);
697 counters.emplace_back(counter);
698 return counter;
699 }
700
701 uint32_t
gpu_clock_id() const702 FreedrenoDriver::gpu_clock_id() const
703 {
704 return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
705 }
706
707 uint64_t
gpu_timestamp() const708 FreedrenoDriver::gpu_timestamp() const
709 {
710 return perfetto::base::GetBootTimeNs().count();
711 }
712
713 bool
cpu_gpu_timestamp(uint64_t &,uint64_t &) const714 FreedrenoDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const
715 {
716 /* Not supported */
717 return false;
718 }
719
720 } // namespace pps
721