1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "fd_pps_driver.h"
8
9 #include <cstring>
10 #include <iostream>
11 #include <perfetto.h>
12
13 #include "pps/pps.h"
14 #include "pps/pps_algorithm.h"
15
16 namespace pps
17 {
18
19 double
safe_div(uint64_t a,uint64_t b)20 safe_div(uint64_t a, uint64_t b)
21 {
22 if (b == 0)
23 return 0;
24
25 return a / static_cast<double>(b);
26 }
27
28 float
percent(uint64_t a,uint64_t b)29 percent(uint64_t a, uint64_t b)
30 {
31 /* Sometimes we get bogus values but we want for the timeline
32 * to look nice without higher than 100% values.
33 */
34 if (b == 0 || a > b)
35 return 0;
36
37 return 100.f * (a / static_cast<double>(b));
38 }
39
40 uint64_t
get_min_sampling_period_ns()41 FreedrenoDriver::get_min_sampling_period_ns()
42 {
43 return 100000;
44 }
45
46 /*
47 TODO this sees like it would be largely the same for a5xx as well
48 (ie. same countable names)..
49 */
50 void
setup_a6xx_counters()51 FreedrenoDriver::setup_a6xx_counters()
52 {
53 /* TODO is there a reason to want more than one group? */
54 CounterGroup group = {};
55 group.name = "counters";
56 groups.clear();
57 counters.clear();
58 countables.clear();
59 enabled_counters.clear();
60 groups.emplace_back(std::move(group));
61
62 /*
63 * Create the countables that we'll be using.
64 */
65
66 auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
67 auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");
68 auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");
69 auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
70 auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
71
72 auto PERF_TP_OUTPUT_PIXELS = countable("PERF_TP_OUTPUT_PIXELS");
73 auto PERF_TP_OUTPUT_PIXELS_ANISO = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
74 auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
75 auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
76 auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
77
78 auto PERF_TSE_INPUT_PRIM = countable("PERF_TSE_INPUT_PRIM");
79 auto PERF_TSE_CLIPPED_PRIM = countable("PERF_TSE_CLIPPED_PRIM");
80 auto PERF_TSE_TRIVAL_REJ_PRIM = countable("PERF_TSE_TRIVAL_REJ_PRIM");
81 auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
82
83 auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");
84 auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
85 auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
86 auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
87 auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
88 auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
89 auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
90 auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
91 auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
92 auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
93 auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
94 auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
95 auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
96
97 auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
98 auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
99 auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
100 auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
101 auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
102
103 auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
104 auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
105 auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
106
107 auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
108
109 auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
110 auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
111
112 /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
113 // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
114
115 /*
116 * And then setup the derived counters that we are exporting to
117 * pps based on the captured countable values.
118 *
119 * We try to expose the same counters as blob:
120 * https://gpuinspector.dev/docs/gpu-counters/qualcomm
121 */
122
123 counter("GPU Frequency", Counter::Units::Hertz, [=]() {
124 return PERF_CP_ALWAYS_COUNT / time;
125 }
126 );
127
128 counter("GPU % Utilization", Counter::Units::Percent, [=]() {
129 return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
130 }
131 );
132
133 counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
134 return PERF_TP_L1_CACHELINE_MISSES / time;
135 }
136 );
137
138 counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
139 return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
140 }
141 );
142
143 /* TODO: verify */
144 counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
145 return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
146 }
147 );
148
149 /* TODO: verify */
150 counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
151 return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
152 }
153 );
154
155 counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
156 return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
157 }
158 );
159
160 counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
161 return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
162 }
163 );
164
165 counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
166 return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
167 }
168 );
169
170 /* TODO: verify */
171 counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
172 return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
173 }
174 );
175
176 counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
177 return PERF_TSE_INPUT_PRIM * (1.f / time);
178 }
179 );
180
181 counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
182 return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
183 }
184 );
185
186 counter("% Prims Clipped", Counter::Units::Percent, [=]() {
187 return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
188 }
189 );
190
191 counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
192 return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
193 }
194 );
195
196 counter("Reused Vertices / Second", Counter::Units::None, [=]() {
197 return PERF_PC_VERTEX_HITS * (1.f / time);
198 }
199 );
200
201 counter("Average Polygon Area", Counter::Units::None, [=]() {
202 return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
203 }
204 );
205
206 /* TODO: find formula */
207 // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
208 // return 100.0 * 0;
209 // }
210 // );
211
212 counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
213 return PERF_PC_VS_INVOCATIONS * (1.f / time);
214 }
215 );
216
217 counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
218 return PERF_HLSQ_QUADS * 4 * (1.f / time);
219 }
220 );
221
222 counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
223 return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
224 PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
225 }
226 );
227
228 counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
229 return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
230 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
231 PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
232 }
233 );
234
235 counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
236 return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
237 }
238 );
239
240 counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
241 return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
242 }
243 );
244
245 counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
246 return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
247 }
248 );
249
250 counter("Textures / Vertex", Counter::Units::None, [=]() {
251 return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
252 }
253 );
254
255 counter("Textures / Fragment", Counter::Units::None, [=]() {
256 return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
257 }
258 );
259
260 counter("ALU / Vertex", Counter::Units::None, [=]() {
261 return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
262 }
263 );
264
265 counter("EFU / Vertex", Counter::Units::None, [=]() {
266 return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
267 }
268 );
269
270 counter("ALU / Fragment", Counter::Units::None, [=]() {
271 return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
272 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
273 }
274 );
275
276 counter("EFU / Fragment", Counter::Units::None, [=]() {
277 return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
278 }
279 );
280
281 counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
282 return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
283 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
284 PERF_SP_ANY_EU_WORKING_FS_STAGE +
285 PERF_SP_ANY_EU_WORKING_CS_STAGE));
286 }
287 );
288
289 counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
290 return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
291 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
292 PERF_SP_ANY_EU_WORKING_FS_STAGE +
293 PERF_SP_ANY_EU_WORKING_CS_STAGE));
294 }
295 );
296
297 counter("% Time Compute", Counter::Units::Percent, [=]() {
298 return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
299 (PERF_SP_ANY_EU_WORKING_VS_STAGE +
300 PERF_SP_ANY_EU_WORKING_FS_STAGE +
301 PERF_SP_ANY_EU_WORKING_CS_STAGE));
302 }
303 );
304
305 counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
306 return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
307 PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
308 PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
309 PERF_SP_BUSY_CYCLES);
310 }
311 );
312
313 counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
314 return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
315 }
316 );
317
318 counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
319 return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
320 }
321 );
322
323 counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
324 return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
325 }
326 );
327
328 counter("% Linear Filtered", Counter::Units::Percent, [=]() {
329 return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
330 }
331 );
332
333 counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
334 return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
335 }
336 );
337
338 counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
339 return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
340 }
341 );
342
343 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
344 // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
345 // return * (1.f / time);
346 // }
347 // );
348
349 /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
350 // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
351 // return * (1.f / time);
352 // }
353 // );
354
355 /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
356 // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
357 // return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
358 // }
359 // );
360
361 /* TODO: verify */
362 counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
363 return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
364 }
365 );
366
367 /* TODO: verify */
368 counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
369 return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
370 }
371 );
372
373 counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
374 return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
375 }
376 );
377
378 counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
379 return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
380 }
381 );
382
383 counter("Preemptions / second", Counter::Units::None, [=]() {
384 return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
385 }
386 );
387
388 counter("Avg Preemption Delay", Counter::Units::None, [=]() {
389 return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
390 }
391 );
392 }
393
394 /**
395 * Generate an submit the cmdstream to configure the counter/countable
396 * muxing
397 */
398 void
configure_counters(bool reset,bool wait)399 FreedrenoDriver::configure_counters(bool reset, bool wait)
400 {
401 struct fd_submit *submit = fd_submit_new(pipe);
402 enum fd_ringbuffer_flags flags =
403 (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
404 struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
405
406 for (auto countable : countables)
407 countable.configure(ring, reset);
408
409 struct fd_submit_fence fence = {};
410 util_queue_fence_init(&fence.ready);
411
412 fd_submit_flush(submit, -1, &fence);
413
414 util_queue_fence_wait(&fence.ready);
415
416 fd_ringbuffer_del(ring);
417 fd_submit_del(submit);
418
419 if (wait)
420 fd_pipe_wait(pipe, &fence.fence);
421 }
422
423 /**
424 * Read the current counter values and record the time.
425 */
426 void
collect_countables()427 FreedrenoDriver::collect_countables()
428 {
429 last_dump_ts = perfetto::base::GetBootTimeNs().count();
430
431 for (auto countable : countables)
432 countable.collect();
433 }
434
435 bool
init_perfcnt()436 FreedrenoDriver::init_perfcnt()
437 {
438 uint64_t val;
439
440 dev = fd_device_new(drm_device.fd);
441 pipe = fd_pipe_new(dev, FD_PIPE_3D);
442 dev_id = fd_pipe_dev_id(pipe);
443
444 if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
445 PERFETTO_FATAL("Could not get MAX_FREQ");
446 return false;
447 }
448 max_freq = val;
449
450 if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
451 PERFETTO_ILOG("Could not get SUSPEND_COUNT");
452 } else {
453 suspend_count = val;
454 has_suspend_count = true;
455 }
456
457 fd_pipe_set_param(pipe, FD_SYSPROF, 1);
458
459 perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
460 if (num_perfcntrs == 0) {
461 PERFETTO_FATAL("No hw counters available");
462 return false;
463 }
464
465 assigned_counters.resize(num_perfcntrs);
466 assigned_counters.assign(assigned_counters.size(), 0);
467
468 switch (fd_dev_gen(dev_id)) {
469 case 6:
470 setup_a6xx_counters();
471 break;
472 default:
473 PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
474 return false;
475 }
476
477 state.resize(next_countable_id);
478
479 for (auto countable : countables)
480 countable.resolve();
481
482 info = fd_dev_info(dev_id);
483
484 io = fd_dt_find_io();
485 if (!io) {
486 PERFETTO_FATAL("Could not map GPU I/O space");
487 return false;
488 }
489
490 configure_counters(true, true);
491 collect_countables();
492
493 return true;
494 }
495
496 void
enable_counter(const uint32_t counter_id)497 FreedrenoDriver::enable_counter(const uint32_t counter_id)
498 {
499 enabled_counters.push_back(counters[counter_id]);
500 }
501
502 void
enable_all_counters()503 FreedrenoDriver::enable_all_counters()
504 {
505 enabled_counters.reserve(counters.size());
506 for (auto &counter : counters) {
507 enabled_counters.push_back(counter);
508 }
509 }
510
511 void
enable_perfcnt(const uint64_t)512 FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
513 {
514 }
515
516 bool
dump_perfcnt()517 FreedrenoDriver::dump_perfcnt()
518 {
519 if (has_suspend_count) {
520 uint64_t val;
521
522 fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
523
524 if (suspend_count != val) {
525 PERFETTO_ILOG("Device had suspended!");
526
527 suspend_count = val;
528
529 configure_counters(true, true);
530 collect_countables();
531
532 /* We aren't going to have anything sensible by comparing
533 * current values to values from prior to the suspend, so
534 * just skip this sampling period.
535 */
536 return false;
537 }
538 }
539
540 auto last_ts = last_dump_ts;
541
542 /* Capture the timestamp from the *start* of the sampling period: */
543 last_capture_ts = last_dump_ts;
544
545 collect_countables();
546
547 auto elapsed_time_ns = last_dump_ts - last_ts;
548
549 time = (float)elapsed_time_ns / 1000000000.0;
550
551 /* On older kernels that dont' support querying the suspend-
552 * count, just send configuration cmdstream regularly to keep
553 * the GPU alive and correctly configured for the countables
554 * we want
555 */
556 if (!has_suspend_count) {
557 configure_counters(false, false);
558 }
559
560 return true;
561 }
562
next()563 uint64_t FreedrenoDriver::next()
564 {
565 auto ret = last_capture_ts;
566 last_capture_ts = 0;
567 return ret;
568 }
569
disable_perfcnt()570 void FreedrenoDriver::disable_perfcnt()
571 {
572 /* There isn't really any disable, only reconfiguring which countables
573 * get muxed to which counters
574 */
575 }
576
577 /*
578 * Countable
579 */
580
581 FreedrenoDriver::Countable
countable(std::string name)582 FreedrenoDriver::countable(std::string name)
583 {
584 auto countable = Countable(this, name);
585 countables.emplace_back(countable);
586 return countable;
587 }
588
Countable(FreedrenoDriver * d,std::string name)589 FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
590 : id {d->next_countable_id++}, d {d}, name {name}
591 {
592 }
593
594 /* Emit register writes on ring to configure counter/countable muxing: */
595 void
configure(struct fd_ringbuffer * ring,bool reset)596 FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
597 {
598 const struct fd_perfcntr_countable *countable = d->state[id].countable;
599 const struct fd_perfcntr_counter *counter = d->state[id].counter;
600
601 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
602
603 if (counter->enable && reset) {
604 OUT_PKT4(ring, counter->enable, 1);
605 OUT_RING(ring, 0);
606 }
607
608 if (counter->clear && reset) {
609 OUT_PKT4(ring, counter->clear, 1);
610 OUT_RING(ring, 1);
611
612 OUT_PKT4(ring, counter->clear, 1);
613 OUT_RING(ring, 0);
614 }
615
616 OUT_PKT4(ring, counter->select_reg, 1);
617 OUT_RING(ring, countable->selector);
618
619 if (counter->enable && reset) {
620 OUT_PKT4(ring, counter->enable, 1);
621 OUT_RING(ring, 1);
622 }
623 }
624
625 /* Collect current counter value and calculate delta since last sample: */
626 void
collect()627 FreedrenoDriver::Countable::collect()
628 {
629 const struct fd_perfcntr_counter *counter = d->state[id].counter;
630
631 d->state[id].last_value = d->state[id].value;
632
633 uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
634 uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
635
636 uint32_t lo = *reg_lo;
637 uint32_t hi = *reg_hi;
638
639 d->state[id].value = lo | ((uint64_t)hi << 32);
640 }
641
642 /* Resolve the countable and assign next counter from it's group: */
643 void
resolve()644 FreedrenoDriver::Countable::resolve()
645 {
646 for (unsigned i = 0; i < d->num_perfcntrs; i++) {
647 const struct fd_perfcntr_group *g = &d->perfcntrs[i];
648 for (unsigned j = 0; j < g->num_countables; j++) {
649 const struct fd_perfcntr_countable *c = &g->countables[j];
650 if (name == c->name) {
651 d->state[id].countable = c;
652
653 /* Assign a counter from the same group: */
654 assert(d->assigned_counters[i] < g->num_counters);
655 d->state[id].counter = &g->counters[d->assigned_counters[i]++];
656
657 std::cout << "Countable: " << name << ", group=" << g->name <<
658 ", counter=" << d->assigned_counters[i] - 1 << "\n";
659
660 return;
661 }
662 }
663 }
664 unreachable("no such countable!");
665 }
666
667 uint64_t
get_value() const668 FreedrenoDriver::Countable::get_value() const
669 {
670 return d->state[id].value - d->state[id].last_value;
671 }
672
673 /*
674 * DerivedCounter
675 */
676
DerivedCounter(FreedrenoDriver * d,std::string name,Counter::Units units,std::function<int64_t ()> derive)677 FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
678 Counter::Units units,
679 std::function<int64_t()> derive)
680 : Counter(d->next_counter_id++, name, 0)
681 {
682 std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
683 this->units = units;
684 set_getter([=](const Counter &c, const Driver &d) {
685 return derive();
686 }
687 );
688 }
689
690 FreedrenoDriver::DerivedCounter
counter(std::string name,Counter::Units units,std::function<int64_t ()> derive)691 FreedrenoDriver::counter(std::string name, Counter::Units units,
692 std::function<int64_t()> derive)
693 {
694 auto counter = DerivedCounter(this, name, units, derive);
695 counters.emplace_back(counter);
696 return counter;
697 }
698
699 uint32_t
gpu_clock_id() const700 FreedrenoDriver::gpu_clock_id() const
701 {
702 return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
703 }
704
705 uint64_t
gpu_timestamp() const706 FreedrenoDriver::gpu_timestamp() const
707 {
708 return perfetto::base::GetBootTimeNs().count();
709 }
710
711 } // namespace pps
712