• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "fd_pps_driver.h"
8 
9 #include <cstring>
10 #include <iostream>
11 #include <perfetto.h>
12 
13 #include "pps/pps.h"
14 #include "pps/pps_algorithm.h"
15 
16 namespace pps
17 {
18 
19 double
safe_div(uint64_t a,uint64_t b)20 safe_div(uint64_t a, uint64_t b)
21 {
22    if (b == 0)
23       return 0;
24 
25    return a / static_cast<double>(b);
26 }
27 
28 float
percent(uint64_t a,uint64_t b)29 percent(uint64_t a, uint64_t b)
30 {
31    /* Sometimes we get bogus values but we want for the timeline
32     * to look nice without higher than 100% values.
33     */
34    if (b == 0 || a > b)
35       return 0;
36 
37    return 100.f * (a / static_cast<double>(b));
38 }
39 
40 bool
is_dump_perfcnt_preemptible() const41 FreedrenoDriver::is_dump_perfcnt_preemptible() const
42 {
43    return false;
44 }
45 
46 uint64_t
get_min_sampling_period_ns()47 FreedrenoDriver::get_min_sampling_period_ns()
48 {
49    return 100000;
50 }
51 
52 /*
53 TODO this sees like it would be largely the same for a5xx as well
54 (ie. same countable names)..
55  */
56 void
setup_a6xx_counters()57 FreedrenoDriver::setup_a6xx_counters()
58 {
59    /* TODO is there a reason to want more than one group? */
60    CounterGroup group = {};
61    group.name = "counters";
62    groups.clear();
63    counters.clear();
64    countables.clear();
65    enabled_counters.clear();
66    groups.emplace_back(std::move(group));
67 
68    /*
69     * Create the countables that we'll be using.
70     */
71 
72    auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
73    auto PERF_CP_BUSY_CYCLES  = countable("PERF_CP_BUSY_CYCLES");
74    auto PERF_RB_3D_PIXELS    = countable("PERF_RB_3D_PIXELS");
75    auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
76    auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
77 
78    auto PERF_TP_OUTPUT_PIXELS  = countable("PERF_TP_OUTPUT_PIXELS");
79    auto PERF_TP_OUTPUT_PIXELS_ANISO  = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
80    auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
81    auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
82    auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
83 
84    auto PERF_TSE_INPUT_PRIM  = countable("PERF_TSE_INPUT_PRIM");
85    auto PERF_TSE_CLIPPED_PRIM  = countable("PERF_TSE_CLIPPED_PRIM");
86    auto PERF_TSE_TRIVAL_REJ_PRIM  = countable("PERF_TSE_TRIVAL_REJ_PRIM");
87    auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
88 
89    auto PERF_SP_BUSY_CYCLES  = countable("PERF_SP_BUSY_CYCLES");
90    auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
91    auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
92    auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
93    auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
94    auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
95    auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
96    auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
97    auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
98    auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
99    auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
100    auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
101    auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
102 
103    auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
104    auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
105    auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
106    auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
107    auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
108 
109    auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
110    auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
111    auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
112 
113    auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
114 
115    auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
116    auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
117 
118    /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
119    // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
120 
121    /*
122     * And then setup the derived counters that we are exporting to
123     * pps based on the captured countable values.
124     *
125     * We try to expose the same counters as blob:
126     * https://gpuinspector.dev/docs/gpu-counters/qualcomm
127     */
128 
129    counter("GPU Frequency", Counter::Units::Hertz, [=]() {
130          return PERF_CP_ALWAYS_COUNT / time;
131       }
132    );
133 
134    counter("GPU % Utilization", Counter::Units::Percent, [=]() {
135          return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
136       }
137    );
138 
139    counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
140          return PERF_TP_L1_CACHELINE_MISSES / time;
141       }
142    );
143 
144    counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
145          return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
146       }
147    );
148 
149    /* TODO: verify */
150    counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
151          return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
152       }
153    );
154 
155    /* TODO: verify */
156    counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
157          return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
158       }
159    );
160 
161    counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
162          return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
163       }
164    );
165 
166    counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
167          return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
168       }
169    );
170 
171    counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
172          return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
173       }
174    );
175 
176    /* TODO: verify */
177    counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
178          return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
179       }
180    );
181 
182    counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
183          return PERF_TSE_INPUT_PRIM * (1.f / time);
184       }
185    );
186 
187    counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
188          return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
189       }
190    );
191 
192    counter("% Prims Clipped", Counter::Units::Percent, [=]() {
193          return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
194       }
195    );
196 
197    counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
198          return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
199       }
200    );
201 
202    counter("Reused Vertices / Second", Counter::Units::None, [=]() {
203          return PERF_PC_VERTEX_HITS * (1.f / time);
204       }
205    );
206 
207    counter("Average Polygon Area", Counter::Units::None, [=]() {
208          return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
209       }
210    );
211 
212    /* TODO: find formula */
213    // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
214    //       return 100.0 * 0;
215    //    }
216    // );
217 
218    counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
219          return PERF_PC_VS_INVOCATIONS * (1.f / time);
220       }
221    );
222 
223    counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
224          return PERF_HLSQ_QUADS * 4 * (1.f / time);
225       }
226    );
227 
228    counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
229          return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
230                  PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
231       }
232    );
233 
234    counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
235          return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
236                  PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
237                  PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
238       }
239    );
240 
241    counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
242          return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
243       }
244    );
245 
246    counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
247          return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
248       }
249    );
250 
251    counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
252          return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
253       }
254    );
255 
256    counter("Textures / Vertex", Counter::Units::None, [=]() {
257          return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
258       }
259    );
260 
261    counter("Textures / Fragment", Counter::Units::None, [=]() {
262          return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
263       }
264    );
265 
266    counter("ALU / Vertex", Counter::Units::None, [=]() {
267          return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
268       }
269    );
270 
271    counter("EFU / Vertex", Counter::Units::None, [=]() {
272          return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
273       }
274    );
275 
276    counter("ALU / Fragment", Counter::Units::None, [=]() {
277          return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
278                          PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
279       }
280    );
281 
282    counter("EFU / Fragment", Counter::Units::None, [=]() {
283          return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
284       }
285    );
286 
287    counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
288          return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
289                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
290                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
291                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
292       }
293    );
294 
295    counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
296          return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
297                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
298                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
299                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
300       }
301    );
302 
303    counter("% Time Compute", Counter::Units::Percent, [=]() {
304          return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
305                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
306                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
307                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
308       }
309    );
310 
311    counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
312          return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
313                          PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
314                          PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
315                         PERF_SP_BUSY_CYCLES);
316       }
317    );
318 
319    counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
320          return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
321       }
322    );
323 
324    counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
325          return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
326       }
327    );
328 
329    counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
330          return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
331       }
332    );
333 
334    counter("% Linear Filtered", Counter::Units::Percent, [=]() {
335          return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
336       }
337    );
338 
339    counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
340          return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
341       }
342    );
343 
344    counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
345          return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
346       }
347    );
348 
349    /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
350    // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
351    //       return  * (1.f / time);
352    //    }
353    // );
354 
355    /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
356    // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
357    //       return  * (1.f / time);
358    //    }
359    // );
360 
361    /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
362    // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
363    //       return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
364    //    }
365    // );
366 
367    /* TODO: verify */
368    counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
369          return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
370       }
371    );
372 
373    /* TODO: verify */
374    counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
375          return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
376       }
377    );
378 
379    counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
380          return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
381       }
382    );
383 
384    counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
385          return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
386       }
387    );
388 
389    counter("Preemptions / second", Counter::Units::None, [=]() {
390          return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
391       }
392    );
393 
394    counter("Avg Preemption Delay", Counter::Units::None, [=]() {
395          return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
396       }
397    );
398 }
399 
400 /**
401  * Generate an submit the cmdstream to configure the counter/countable
402  * muxing
403  */
404 void
configure_counters(bool reset,bool wait)405 FreedrenoDriver::configure_counters(bool reset, bool wait)
406 {
407    struct fd_submit *submit = fd_submit_new(pipe);
408    enum fd_ringbuffer_flags flags =
409       (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
410    struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
411 
412    for (const auto &countable : countables)
413       countable.configure(ring, reset);
414 
415    struct fd_fence *fence = fd_submit_flush(submit, -1, false);
416 
417    fd_fence_flush(fence);
418    fd_fence_del(fence);
419 
420    fd_ringbuffer_del(ring);
421    fd_submit_del(submit);
422 
423    if (wait)
424       fd_pipe_wait(pipe, fence);
425 }
426 
427 /**
428  * Read the current counter values and record the time.
429  */
430 void
collect_countables()431 FreedrenoDriver::collect_countables()
432 {
433    last_dump_ts = perfetto::base::GetBootTimeNs().count();
434 
435    for (const auto &countable : countables)
436       countable.collect();
437 }
438 
439 bool
init_perfcnt()440 FreedrenoDriver::init_perfcnt()
441 {
442    uint64_t val;
443 
444    dev = fd_device_new(drm_device.fd);
445    pipe = fd_pipe_new2(dev, FD_PIPE_3D, 0);
446    dev_id = fd_pipe_dev_id(pipe);
447 
448    if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
449       PERFETTO_FATAL("Could not get MAX_FREQ");
450       return false;
451    }
452    max_freq = val;
453 
454    if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
455       PERFETTO_ILOG("Could not get SUSPEND_COUNT");
456    } else {
457       suspend_count = val;
458       has_suspend_count = true;
459    }
460 
461    fd_pipe_set_param(pipe, FD_SYSPROF, 1);
462 
463    perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
464    if (num_perfcntrs == 0) {
465       PERFETTO_FATAL("No hw counters available");
466       return false;
467    }
468 
469    assigned_counters.resize(num_perfcntrs);
470    assigned_counters.assign(assigned_counters.size(), 0);
471 
472    switch (fd_dev_gen(dev_id)) {
473    case 6:
474       setup_a6xx_counters();
475       break;
476    default:
477       PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
478       return false;
479    }
480 
481    state.resize(next_countable_id);
482 
483    for (const auto &countable : countables)
484       countable.resolve();
485 
486    info = fd_dev_info_raw(dev_id);
487 
488    io = fd_dt_find_io();
489    if (!io) {
490       PERFETTO_FATAL("Could not map GPU I/O space");
491       return false;
492    }
493 
494    configure_counters(true, true);
495    collect_countables();
496 
497    return true;
498 }
499 
500 void
enable_counter(const uint32_t counter_id)501 FreedrenoDriver::enable_counter(const uint32_t counter_id)
502 {
503    enabled_counters.push_back(counters[counter_id]);
504 }
505 
506 void
enable_all_counters()507 FreedrenoDriver::enable_all_counters()
508 {
509    enabled_counters.reserve(counters.size());
510    for (auto &counter : counters) {
511       enabled_counters.push_back(counter);
512    }
513 }
514 
515 void
enable_perfcnt(const uint64_t)516 FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
517 {
518 }
519 
520 bool
dump_perfcnt()521 FreedrenoDriver::dump_perfcnt()
522 {
523    if (has_suspend_count) {
524       uint64_t val;
525 
526       fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
527 
528       if (suspend_count != val) {
529          PERFETTO_ILOG("Device had suspended!");
530 
531          suspend_count = val;
532 
533          configure_counters(true, true);
534          collect_countables();
535 
536          /* We aren't going to have anything sensible by comparing
537           * current values to values from prior to the suspend, so
538           * just skip this sampling period.
539           */
540          return false;
541       }
542    }
543 
544    auto last_ts = last_dump_ts;
545 
546    /* Capture the timestamp from the *start* of the sampling period: */
547    last_capture_ts = last_dump_ts;
548 
549    collect_countables();
550 
551    auto elapsed_time_ns = last_dump_ts - last_ts;
552 
553    time = (float)elapsed_time_ns / 1000000000.0;
554 
555    /* On older kernels that dont' support querying the suspend-
556     * count, just send configuration cmdstream regularly to keep
557     * the GPU alive and correctly configured for the countables
558     * we want
559     */
560    if (!has_suspend_count) {
561       configure_counters(false, false);
562    }
563 
564    return true;
565 }
566 
next()567 uint64_t FreedrenoDriver::next()
568 {
569    auto ret = last_capture_ts;
570    last_capture_ts = 0;
571    return ret;
572 }
573 
disable_perfcnt()574 void FreedrenoDriver::disable_perfcnt()
575 {
576    /* There isn't really any disable, only reconfiguring which countables
577     * get muxed to which counters
578     */
579 }
580 
581 /*
582  * Countable
583  */
584 
585 FreedrenoDriver::Countable
countable(std::string name)586 FreedrenoDriver::countable(std::string name)
587 {
588    auto countable = Countable(this, name);
589    countables.emplace_back(countable);
590    return countable;
591 }
592 
Countable(FreedrenoDriver * d,std::string name)593 FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
594    : id {d->next_countable_id++}, d {d}, name {name}
595 {
596 }
597 
598 /* Emit register writes on ring to configure counter/countable muxing: */
599 void
configure(struct fd_ringbuffer * ring,bool reset) const600 FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset) const
601 {
602    const struct fd_perfcntr_countable *countable = d->state[id].countable;
603    const struct fd_perfcntr_counter   *counter   = d->state[id].counter;
604 
605    OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
606 
607    if (counter->enable && reset) {
608       OUT_PKT4(ring, counter->enable, 1);
609       OUT_RING(ring, 0);
610    }
611 
612    if (counter->clear && reset) {
613       OUT_PKT4(ring, counter->clear, 1);
614       OUT_RING(ring, 1);
615 
616       OUT_PKT4(ring, counter->clear, 1);
617       OUT_RING(ring, 0);
618    }
619 
620    OUT_PKT4(ring, counter->select_reg, 1);
621    OUT_RING(ring, countable->selector);
622 
623    if (counter->enable && reset) {
624       OUT_PKT4(ring, counter->enable, 1);
625       OUT_RING(ring, 1);
626    }
627 }
628 
629 /* Collect current counter value and calculate delta since last sample: */
630 void
collect() const631 FreedrenoDriver::Countable::collect() const
632 {
633    const struct fd_perfcntr_counter *counter = d->state[id].counter;
634 
635    d->state[id].last_value = d->state[id].value;
636 
637    /* this is true on a5xx and later */
638    assert(counter->counter_reg_lo + 1 == counter->counter_reg_hi);
639    uint64_t *reg = (uint64_t *)((uint32_t *)d->io + counter->counter_reg_lo);
640 
641    d->state[id].value = *reg;
642 }
643 
644 /* Resolve the countable and assign next counter from it's group: */
645 void
resolve() const646 FreedrenoDriver::Countable::resolve() const
647 {
648    for (unsigned i = 0; i < d->num_perfcntrs; i++) {
649       const struct fd_perfcntr_group *g = &d->perfcntrs[i];
650       for (unsigned j = 0; j < g->num_countables; j++) {
651          const struct fd_perfcntr_countable *c = &g->countables[j];
652          if (name == c->name) {
653             d->state[id].countable = c;
654 
655             /* Assign a counter from the same group: */
656             assert(d->assigned_counters[i] < g->num_counters);
657             d->state[id].counter = &g->counters[d->assigned_counters[i]++];
658 
659             std::cout << "Countable: " << name << ", group=" << g->name <<
660                   ", counter=" << d->assigned_counters[i] - 1 << "\n";
661 
662             return;
663          }
664       }
665    }
666    unreachable("no such countable!");
667 }
668 
669 uint64_t
get_value() const670 FreedrenoDriver::Countable::get_value() const
671 {
672    return d->state[id].value - d->state[id].last_value;
673 }
674 
675 /*
676  * DerivedCounter
677  */
678 
DerivedCounter(FreedrenoDriver * d,std::string name,Counter::Units units,std::function<int64_t ()> derive)679 FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
680                                                 Counter::Units units,
681                                                 std::function<int64_t()> derive)
682    : Counter(d->next_counter_id++, name, 0)
683 {
684    std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
685    this->units = units;
686    set_getter([=](const Counter &c, const Driver &d) {
687          return derive();
688       }
689    );
690 }
691 
692 FreedrenoDriver::DerivedCounter
counter(std::string name,Counter::Units units,std::function<int64_t ()> derive)693 FreedrenoDriver::counter(std::string name, Counter::Units units,
694                          std::function<int64_t()> derive)
695 {
696    auto counter = DerivedCounter(this, name, units, derive);
697    counters.emplace_back(counter);
698    return counter;
699 }
700 
701 uint32_t
gpu_clock_id() const702 FreedrenoDriver::gpu_clock_id() const
703 {
704    return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
705 }
706 
707 uint64_t
gpu_timestamp() const708 FreedrenoDriver::gpu_timestamp() const
709 {
710    return perfetto::base::GetBootTimeNs().count();
711 }
712 
713 bool
cpu_gpu_timestamp(uint64_t &,uint64_t &) const714 FreedrenoDriver::cpu_gpu_timestamp(uint64_t &, uint64_t &) const
715 {
716    /* Not supported */
717    return false;
718 }
719 
720 } // namespace pps
721