• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "fd_pps_driver.h"
8 
9 #include <cstring>
10 #include <iostream>
11 #include <perfetto.h>
12 
13 #include "pps/pps.h"
14 #include "pps/pps_algorithm.h"
15 
16 namespace pps
17 {
18 
19 double
safe_div(uint64_t a,uint64_t b)20 safe_div(uint64_t a, uint64_t b)
21 {
22    if (b == 0)
23       return 0;
24 
25    return a / static_cast<double>(b);
26 }
27 
28 float
percent(uint64_t a,uint64_t b)29 percent(uint64_t a, uint64_t b)
30 {
31    /* Sometimes we get bogus values but we want for the timeline
32     * to look nice without higher than 100% values.
33     */
34    if (b == 0 || a > b)
35       return 0;
36 
37    return 100.f * (a / static_cast<double>(b));
38 }
39 
40 uint64_t
get_min_sampling_period_ns()41 FreedrenoDriver::get_min_sampling_period_ns()
42 {
43    return 100000;
44 }
45 
46 /*
47 TODO this sees like it would be largely the same for a5xx as well
48 (ie. same countable names)..
49  */
50 void
setup_a6xx_counters()51 FreedrenoDriver::setup_a6xx_counters()
52 {
53    /* TODO is there a reason to want more than one group? */
54    CounterGroup group = {};
55    group.name = "counters";
56    groups.clear();
57    counters.clear();
58    countables.clear();
59    enabled_counters.clear();
60    groups.emplace_back(std::move(group));
61 
62    /*
63     * Create the countables that we'll be using.
64     */
65 
66    auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");
67    auto PERF_CP_BUSY_CYCLES  = countable("PERF_CP_BUSY_CYCLES");
68    auto PERF_RB_3D_PIXELS    = countable("PERF_RB_3D_PIXELS");
69    auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");
70    auto PERF_TP_L1_CACHELINE_REQUESTS = countable("PERF_TP_L1_CACHELINE_REQUESTS");
71 
72    auto PERF_TP_OUTPUT_PIXELS  = countable("PERF_TP_OUTPUT_PIXELS");
73    auto PERF_TP_OUTPUT_PIXELS_ANISO  = countable("PERF_TP_OUTPUT_PIXELS_ANISO");
74    auto PERF_TP_OUTPUT_PIXELS_BILINEAR = countable("PERF_TP_OUTPUT_PIXELS_BILINEAR");
75    auto PERF_TP_OUTPUT_PIXELS_POINT = countable("PERF_TP_OUTPUT_PIXELS_POINT");
76    auto PERF_TP_OUTPUT_PIXELS_ZERO_LOD = countable("PERF_TP_OUTPUT_PIXELS_ZERO_LOD");
77 
78    auto PERF_TSE_INPUT_PRIM  = countable("PERF_TSE_INPUT_PRIM");
79    auto PERF_TSE_CLIPPED_PRIM  = countable("PERF_TSE_CLIPPED_PRIM");
80    auto PERF_TSE_TRIVAL_REJ_PRIM  = countable("PERF_TSE_TRIVAL_REJ_PRIM");
81    auto PERF_TSE_OUTPUT_VISIBLE_PRIM = countable("PERF_TSE_OUTPUT_VISIBLE_PRIM");
82 
83    auto PERF_SP_BUSY_CYCLES  = countable("PERF_SP_BUSY_CYCLES");
84    auto PERF_SP_ALU_WORKING_CYCLES = countable("PERF_SP_ALU_WORKING_CYCLES");
85    auto PERF_SP_EFU_WORKING_CYCLES = countable("PERF_SP_EFU_WORKING_CYCLES");
86    auto PERF_SP_VS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_EFU_INSTRUCTIONS");
87    auto PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS");
88    auto PERF_SP_VS_STAGE_TEX_INSTRUCTIONS = countable("PERF_SP_VS_STAGE_TEX_INSTRUCTIONS");
89    auto PERF_SP_FS_STAGE_EFU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_EFU_INSTRUCTIONS");
90    auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");
91    auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");
92    auto PERF_SP_STALL_CYCLES_TP = countable("PERF_SP_STALL_CYCLES_TP");
93    auto PERF_SP_ANY_EU_WORKING_FS_STAGE = countable("PERF_SP_ANY_EU_WORKING_FS_STAGE");
94    auto PERF_SP_ANY_EU_WORKING_VS_STAGE = countable("PERF_SP_ANY_EU_WORKING_VS_STAGE");
95    auto PERF_SP_ANY_EU_WORKING_CS_STAGE = countable("PERF_SP_ANY_EU_WORKING_CS_STAGE");
96 
97    auto PERF_UCHE_STALL_CYCLES_ARBITER = countable("PERF_UCHE_STALL_CYCLES_ARBITER");
98    auto PERF_UCHE_VBIF_READ_BEATS_TP = countable("PERF_UCHE_VBIF_READ_BEATS_TP");
99    auto PERF_UCHE_VBIF_READ_BEATS_VFD = countable("PERF_UCHE_VBIF_READ_BEATS_VFD");
100    auto PERF_UCHE_VBIF_READ_BEATS_SP = countable("PERF_UCHE_VBIF_READ_BEATS_SP");
101    auto PERF_UCHE_READ_REQUESTS_TP = countable("PERF_UCHE_READ_REQUESTS_TP");
102 
103    auto PERF_PC_STALL_CYCLES_VFD = countable("PERF_PC_STALL_CYCLES_VFD");
104    auto PERF_PC_VS_INVOCATIONS = countable("PERF_PC_VS_INVOCATIONS");
105    auto PERF_PC_VERTEX_HITS = countable("PERF_PC_VERTEX_HITS");
106 
107    auto PERF_HLSQ_QUADS = countable("PERF_HLSQ_QUADS"); /* Quads (fragments / 4) produced */
108 
109    auto PERF_CP_NUM_PREEMPTIONS = countable("PERF_CP_NUM_PREEMPTIONS");
110    auto PERF_CP_PREEMPTION_REACTION_DELAY = countable("PERF_CP_PREEMPTION_REACTION_DELAY");
111 
112    /* TODO: resolve() tells there is no PERF_CMPDECMP_VBIF_READ_DATA */
113    // auto PERF_CMPDECMP_VBIF_READ_DATA = countable("PERF_CMPDECMP_VBIF_READ_DATA");
114 
115    /*
116     * And then setup the derived counters that we are exporting to
117     * pps based on the captured countable values.
118     *
119     * We try to expose the same counters as blob:
120     * https://gpuinspector.dev/docs/gpu-counters/qualcomm
121     */
122 
123    counter("GPU Frequency", Counter::Units::Hertz, [=]() {
124          return PERF_CP_ALWAYS_COUNT / time;
125       }
126    );
127 
128    counter("GPU % Utilization", Counter::Units::Percent, [=]() {
129          return percent(PERF_CP_BUSY_CYCLES / time, max_freq);
130       }
131    );
132 
133    counter("TP L1 Cache Misses", Counter::Units::None, [=]() {
134          return PERF_TP_L1_CACHELINE_MISSES / time;
135       }
136    );
137 
138    counter("Shader Core Utilization", Counter::Units::Percent, [=]() {
139          return percent(PERF_SP_BUSY_CYCLES / time, max_freq * info->num_sp_cores);
140       }
141    );
142 
143    /* TODO: verify */
144    counter("(?) % Texture Fetch Stall", Counter::Units::Percent, [=]() {
145          return percent(PERF_SP_STALL_CYCLES_TP / time, max_freq * info->num_sp_cores);
146       }
147    );
148 
149    /* TODO: verify */
150    counter("(?) % Vertex Fetch Stall", Counter::Units::Percent, [=]() {
151          return percent(PERF_PC_STALL_CYCLES_VFD / time, max_freq * info->num_sp_cores);
152       }
153    );
154 
155    counter("L1 Texture Cache Miss Per Pixel", Counter::Units::None, [=]() {
156          return safe_div(PERF_TP_L1_CACHELINE_MISSES, PERF_HLSQ_QUADS * 4);
157       }
158    );
159 
160    counter("% Texture L1 Miss", Counter::Units::Percent, [=]() {
161          return percent(PERF_TP_L1_CACHELINE_MISSES, PERF_TP_L1_CACHELINE_REQUESTS);
162       }
163    );
164 
165    counter("% Texture L2 Miss", Counter::Units::Percent, [=]() {
166          return percent(PERF_UCHE_VBIF_READ_BEATS_TP / 2, PERF_UCHE_READ_REQUESTS_TP);
167       }
168    );
169 
170    /* TODO: verify */
171    counter("(?) % Stalled on System Memory", Counter::Units::Percent, [=]() {
172          return percent(PERF_UCHE_STALL_CYCLES_ARBITER / time, max_freq * info->num_sp_cores);
173       }
174    );
175 
176    counter("Pre-clipped Polygons / Second", Counter::Units::None, [=]() {
177          return PERF_TSE_INPUT_PRIM * (1.f / time);
178       }
179    );
180 
181    counter("% Prims Trivially Rejected", Counter::Units::Percent, [=]() {
182          return percent(PERF_TSE_TRIVAL_REJ_PRIM, PERF_TSE_INPUT_PRIM);
183       }
184    );
185 
186    counter("% Prims Clipped", Counter::Units::Percent, [=]() {
187          return percent(PERF_TSE_CLIPPED_PRIM, PERF_TSE_INPUT_PRIM);
188       }
189    );
190 
191    counter("Average Vertices / Polygon", Counter::Units::None, [=]() {
192          return PERF_PC_VS_INVOCATIONS / PERF_TSE_INPUT_PRIM;
193       }
194    );
195 
196    counter("Reused Vertices / Second", Counter::Units::None, [=]() {
197          return PERF_PC_VERTEX_HITS * (1.f / time);
198       }
199    );
200 
201    counter("Average Polygon Area", Counter::Units::None, [=]() {
202          return safe_div(PERF_HLSQ_QUADS * 4, PERF_TSE_OUTPUT_VISIBLE_PRIM);
203       }
204    );
205 
206    /* TODO: find formula */
207    // counter("% Shaders Busy", Counter::Units::Percent, [=]() {
208    //       return 100.0 * 0;
209    //    }
210    // );
211 
212    counter("Vertices Shaded / Second", Counter::Units::None, [=]() {
213          return PERF_PC_VS_INVOCATIONS * (1.f / time);
214       }
215    );
216 
217    counter("Fragments Shaded / Second", Counter::Units::None, [=]() {
218          return PERF_HLSQ_QUADS * 4 * (1.f / time);
219       }
220    );
221 
222    counter("Vertex Instructions / Second", Counter::Units::None, [=]() {
223          return (PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
224                  PERF_SP_VS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
225       }
226    );
227 
228    counter("Fragment Instructions / Second", Counter::Units::None, [=]() {
229          return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
230                  PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2 +
231                  PERF_SP_FS_STAGE_EFU_INSTRUCTIONS) * (1.f / time);
232       }
233    );
234 
235    counter("Fragment ALU Instructions / Sec (Full)", Counter::Units::None, [=]() {
236          return PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS * (1.f / time);
237       }
238    );
239 
240    counter("Fragment ALU Instructions / Sec (Half)", Counter::Units::None, [=]() {
241          return PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS * (1.f / time);
242       }
243    );
244 
245    counter("Fragment EFU Instructions / Second", Counter::Units::None, [=]() {
246          return PERF_SP_FS_STAGE_EFU_INSTRUCTIONS * (1.f / time);
247       }
248    );
249 
250    counter("Textures / Vertex", Counter::Units::None, [=]() {
251          return safe_div(PERF_SP_VS_STAGE_TEX_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
252       }
253    );
254 
255    counter("Textures / Fragment", Counter::Units::None, [=]() {
256          return safe_div(PERF_TP_OUTPUT_PIXELS, PERF_HLSQ_QUADS * 4);
257       }
258    );
259 
260    counter("ALU / Vertex", Counter::Units::None, [=]() {
261          return safe_div(PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
262       }
263    );
264 
265    counter("EFU / Vertex", Counter::Units::None, [=]() {
266          return safe_div(PERF_SP_VS_STAGE_EFU_INSTRUCTIONS, PERF_PC_VS_INVOCATIONS);
267       }
268    );
269 
270    counter("ALU / Fragment", Counter::Units::None, [=]() {
271          return safe_div(PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
272                          PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2, PERF_HLSQ_QUADS);
273       }
274    );
275 
276    counter("EFU / Fragment", Counter::Units::None, [=]() {
277          return safe_div(PERF_SP_FS_STAGE_EFU_INSTRUCTIONS, PERF_HLSQ_QUADS);
278       }
279    );
280 
281    counter("% Time Shading Vertices", Counter::Units::Percent, [=]() {
282          return percent(PERF_SP_ANY_EU_WORKING_VS_STAGE,
283                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
284                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
285                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
286       }
287    );
288 
289    counter("% Time Shading Fragments", Counter::Units::Percent, [=]() {
290          return percent(PERF_SP_ANY_EU_WORKING_FS_STAGE,
291                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
292                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
293                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
294       }
295    );
296 
297    counter("% Time Compute", Counter::Units::Percent, [=]() {
298          return percent(PERF_SP_ANY_EU_WORKING_CS_STAGE,
299                         (PERF_SP_ANY_EU_WORKING_VS_STAGE +
300                          PERF_SP_ANY_EU_WORKING_FS_STAGE +
301                          PERF_SP_ANY_EU_WORKING_CS_STAGE));
302       }
303    );
304 
305    counter("% Shader ALU Capacity Utilized", Counter::Units::Percent, [=]() {
306          return percent((PERF_SP_VS_STAGE_FULL_ALU_INSTRUCTIONS +
307                          PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +
308                          PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / 64,
309                         PERF_SP_BUSY_CYCLES);
310       }
311    );
312 
313    counter("% Time ALUs Working", Counter::Units::Percent, [=]() {
314          return percent(PERF_SP_ALU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
315       }
316    );
317 
318    counter("% Time EFUs Working", Counter::Units::Percent, [=]() {
319          return percent(PERF_SP_EFU_WORKING_CYCLES / 2, PERF_SP_BUSY_CYCLES);
320       }
321    );
322 
323    counter("% Anisotropic Filtered", Counter::Units::Percent, [=]() {
324          return percent(PERF_TP_OUTPUT_PIXELS_ANISO, PERF_TP_OUTPUT_PIXELS);
325       }
326    );
327 
328    counter("% Linear Filtered", Counter::Units::Percent, [=]() {
329          return percent(PERF_TP_OUTPUT_PIXELS_BILINEAR, PERF_TP_OUTPUT_PIXELS);
330       }
331    );
332 
333    counter("% Nearest Filtered", Counter::Units::Percent, [=]() {
334          return percent(PERF_TP_OUTPUT_PIXELS_POINT, PERF_TP_OUTPUT_PIXELS);
335       }
336    );
337 
338    counter("% Non-Base Level Textures", Counter::Units::Percent, [=]() {
339          return percent(PERF_TP_OUTPUT_PIXELS_ZERO_LOD, PERF_TP_OUTPUT_PIXELS);
340       }
341    );
342 
343    /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=63 */
344    // counter("Read Total (Bytes/sec)", Counter::Units::Byte, [=]() {
345    //       return  * (1.f / time);
346    //    }
347    // );
348 
349    /* Reads from KGSL_PERFCOUNTER_GROUP_VBIF countable=84 */
350    // counter("Write Total (Bytes/sec)", Counter::Units::Byte, [=]() {
351    //       return  * (1.f / time);
352    //    }
353    // );
354 
355    /* Cannot get PERF_CMPDECMP_VBIF_READ_DATA countable */
356    // counter("Texture Memory Read BW (Bytes/Second)", Counter::Units::Byte, [=]() {
357    //       return (PERF_CMPDECMP_VBIF_READ_DATA + PERF_UCHE_VBIF_READ_BEATS_TP) * (1.f / time);
358    //    }
359    // );
360 
361    /* TODO: verify */
362    counter("(?) Vertex Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
363          return PERF_UCHE_VBIF_READ_BEATS_VFD * 32 * (1.f / time);
364       }
365    );
366 
367    /* TODO: verify */
368    counter("SP Memory Read (Bytes/Second)", Counter::Units::Byte, [=]() {
369          return PERF_UCHE_VBIF_READ_BEATS_SP * 32 * (1.f / time);
370       }
371    );
372 
373    counter("Avg Bytes / Fragment", Counter::Units::Byte, [=]() {
374          return safe_div(PERF_UCHE_VBIF_READ_BEATS_TP * 32, PERF_HLSQ_QUADS * 4);
375       }
376    );
377 
378    counter("Avg Bytes / Vertex", Counter::Units::Byte, [=]() {
379          return safe_div(PERF_UCHE_VBIF_READ_BEATS_VFD * 32, PERF_PC_VS_INVOCATIONS);
380       }
381    );
382 
383    counter("Preemptions / second", Counter::Units::None, [=]() {
384          return PERF_CP_NUM_PREEMPTIONS * (1.f / time);
385       }
386    );
387 
388    counter("Avg Preemption Delay", Counter::Units::None, [=]() {
389          return PERF_CP_PREEMPTION_REACTION_DELAY * (1.f / time);
390       }
391    );
392 }
393 
394 /**
395  * Generate an submit the cmdstream to configure the counter/countable
396  * muxing
397  */
398 void
configure_counters(bool reset,bool wait)399 FreedrenoDriver::configure_counters(bool reset, bool wait)
400 {
401    struct fd_submit *submit = fd_submit_new(pipe);
402    enum fd_ringbuffer_flags flags =
403       (enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
404    struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);
405 
406    for (auto countable : countables)
407       countable.configure(ring, reset);
408 
409    struct fd_submit_fence fence = {};
410    util_queue_fence_init(&fence.ready);
411 
412    fd_submit_flush(submit, -1, &fence);
413 
414    util_queue_fence_wait(&fence.ready);
415 
416    fd_ringbuffer_del(ring);
417    fd_submit_del(submit);
418 
419    if (wait)
420       fd_pipe_wait(pipe, &fence.fence);
421 }
422 
423 /**
424  * Read the current counter values and record the time.
425  */
426 void
collect_countables()427 FreedrenoDriver::collect_countables()
428 {
429    last_dump_ts = perfetto::base::GetBootTimeNs().count();
430 
431    for (auto countable : countables)
432       countable.collect();
433 }
434 
435 bool
init_perfcnt()436 FreedrenoDriver::init_perfcnt()
437 {
438    uint64_t val;
439 
440    dev = fd_device_new(drm_device.fd);
441    pipe = fd_pipe_new(dev, FD_PIPE_3D);
442    dev_id = fd_pipe_dev_id(pipe);
443 
444    if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {
445       PERFETTO_FATAL("Could not get MAX_FREQ");
446       return false;
447    }
448    max_freq = val;
449 
450    if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {
451       PERFETTO_ILOG("Could not get SUSPEND_COUNT");
452    } else {
453       suspend_count = val;
454       has_suspend_count = true;
455    }
456 
457    fd_pipe_set_param(pipe, FD_SYSPROF, 1);
458 
459    perfcntrs = fd_perfcntrs(fd_pipe_dev_id(pipe), &num_perfcntrs);
460    if (num_perfcntrs == 0) {
461       PERFETTO_FATAL("No hw counters available");
462       return false;
463    }
464 
465    assigned_counters.resize(num_perfcntrs);
466    assigned_counters.assign(assigned_counters.size(), 0);
467 
468    switch (fd_dev_gen(dev_id)) {
469    case 6:
470       setup_a6xx_counters();
471       break;
472    default:
473       PERFETTO_FATAL("Unsupported GPU: a%03u", fd_dev_gpu_id(dev_id));
474       return false;
475    }
476 
477    state.resize(next_countable_id);
478 
479    for (auto countable : countables)
480       countable.resolve();
481 
482    info = fd_dev_info(dev_id);
483 
484    io = fd_dt_find_io();
485    if (!io) {
486       PERFETTO_FATAL("Could not map GPU I/O space");
487       return false;
488    }
489 
490    configure_counters(true, true);
491    collect_countables();
492 
493    return true;
494 }
495 
496 void
enable_counter(const uint32_t counter_id)497 FreedrenoDriver::enable_counter(const uint32_t counter_id)
498 {
499    enabled_counters.push_back(counters[counter_id]);
500 }
501 
502 void
enable_all_counters()503 FreedrenoDriver::enable_all_counters()
504 {
505    enabled_counters.reserve(counters.size());
506    for (auto &counter : counters) {
507       enabled_counters.push_back(counter);
508    }
509 }
510 
511 void
enable_perfcnt(const uint64_t)512 FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)
513 {
514 }
515 
516 bool
dump_perfcnt()517 FreedrenoDriver::dump_perfcnt()
518 {
519    if (has_suspend_count) {
520       uint64_t val;
521 
522       fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);
523 
524       if (suspend_count != val) {
525          PERFETTO_ILOG("Device had suspended!");
526 
527          suspend_count = val;
528 
529          configure_counters(true, true);
530          collect_countables();
531 
532          /* We aren't going to have anything sensible by comparing
533           * current values to values from prior to the suspend, so
534           * just skip this sampling period.
535           */
536          return false;
537       }
538    }
539 
540    auto last_ts = last_dump_ts;
541 
542    /* Capture the timestamp from the *start* of the sampling period: */
543    last_capture_ts = last_dump_ts;
544 
545    collect_countables();
546 
547    auto elapsed_time_ns = last_dump_ts - last_ts;
548 
549    time = (float)elapsed_time_ns / 1000000000.0;
550 
551    /* On older kernels that dont' support querying the suspend-
552     * count, just send configuration cmdstream regularly to keep
553     * the GPU alive and correctly configured for the countables
554     * we want
555     */
556    if (!has_suspend_count) {
557       configure_counters(false, false);
558    }
559 
560    return true;
561 }
562 
next()563 uint64_t FreedrenoDriver::next()
564 {
565    auto ret = last_capture_ts;
566    last_capture_ts = 0;
567    return ret;
568 }
569 
disable_perfcnt()570 void FreedrenoDriver::disable_perfcnt()
571 {
572    /* There isn't really any disable, only reconfiguring which countables
573     * get muxed to which counters
574     */
575 }
576 
577 /*
578  * Countable
579  */
580 
581 FreedrenoDriver::Countable
countable(std::string name)582 FreedrenoDriver::countable(std::string name)
583 {
584    auto countable = Countable(this, name);
585    countables.emplace_back(countable);
586    return countable;
587 }
588 
Countable(FreedrenoDriver * d,std::string name)589 FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)
590    : id {d->next_countable_id++}, d {d}, name {name}
591 {
592 }
593 
594 /* Emit register writes on ring to configure counter/countable muxing: */
595 void
configure(struct fd_ringbuffer * ring,bool reset)596 FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)
597 {
598    const struct fd_perfcntr_countable *countable = d->state[id].countable;
599    const struct fd_perfcntr_counter   *counter   = d->state[id].counter;
600 
601    OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
602 
603    if (counter->enable && reset) {
604       OUT_PKT4(ring, counter->enable, 1);
605       OUT_RING(ring, 0);
606    }
607 
608    if (counter->clear && reset) {
609       OUT_PKT4(ring, counter->clear, 1);
610       OUT_RING(ring, 1);
611 
612       OUT_PKT4(ring, counter->clear, 1);
613       OUT_RING(ring, 0);
614    }
615 
616    OUT_PKT4(ring, counter->select_reg, 1);
617    OUT_RING(ring, countable->selector);
618 
619    if (counter->enable && reset) {
620       OUT_PKT4(ring, counter->enable, 1);
621       OUT_RING(ring, 1);
622    }
623 }
624 
625 /* Collect current counter value and calculate delta since last sample: */
626 void
collect()627 FreedrenoDriver::Countable::collect()
628 {
629    const struct fd_perfcntr_counter *counter = d->state[id].counter;
630 
631    d->state[id].last_value = d->state[id].value;
632 
633    uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;
634    uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;
635 
636    uint32_t lo = *reg_lo;
637    uint32_t hi = *reg_hi;
638 
639    d->state[id].value = lo | ((uint64_t)hi << 32);
640 }
641 
642 /* Resolve the countable and assign next counter from it's group: */
643 void
resolve()644 FreedrenoDriver::Countable::resolve()
645 {
646    for (unsigned i = 0; i < d->num_perfcntrs; i++) {
647       const struct fd_perfcntr_group *g = &d->perfcntrs[i];
648       for (unsigned j = 0; j < g->num_countables; j++) {
649          const struct fd_perfcntr_countable *c = &g->countables[j];
650          if (name == c->name) {
651             d->state[id].countable = c;
652 
653             /* Assign a counter from the same group: */
654             assert(d->assigned_counters[i] < g->num_counters);
655             d->state[id].counter = &g->counters[d->assigned_counters[i]++];
656 
657             std::cout << "Countable: " << name << ", group=" << g->name <<
658                   ", counter=" << d->assigned_counters[i] - 1 << "\n";
659 
660             return;
661          }
662       }
663    }
664    unreachable("no such countable!");
665 }
666 
667 uint64_t
get_value() const668 FreedrenoDriver::Countable::get_value() const
669 {
670    return d->state[id].value - d->state[id].last_value;
671 }
672 
673 /*
674  * DerivedCounter
675  */
676 
DerivedCounter(FreedrenoDriver * d,std::string name,Counter::Units units,std::function<int64_t ()> derive)677 FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,
678                                                 Counter::Units units,
679                                                 std::function<int64_t()> derive)
680    : Counter(d->next_counter_id++, name, 0)
681 {
682    std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";
683    this->units = units;
684    set_getter([=](const Counter &c, const Driver &d) {
685          return derive();
686       }
687    );
688 }
689 
690 FreedrenoDriver::DerivedCounter
counter(std::string name,Counter::Units units,std::function<int64_t ()> derive)691 FreedrenoDriver::counter(std::string name, Counter::Units units,
692                          std::function<int64_t()> derive)
693 {
694    auto counter = DerivedCounter(this, name, units, derive);
695    counters.emplace_back(counter);
696    return counter;
697 }
698 
699 uint32_t
gpu_clock_id() const700 FreedrenoDriver::gpu_clock_id() const
701 {
702    return perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME;
703 }
704 
705 uint64_t
gpu_timestamp() const706 FreedrenoDriver::gpu_timestamp() const
707 {
708    return perfetto::base::GetBootTimeNs().count();
709 }
710 
711 } // namespace pps
712