• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <robclark@freedesktop.org>
8  */
9 
10 #define FD_BO_NO_HARDPIN 1
11 
12 /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
13 
14 #include "freedreno_query_acc.h"
15 #include "freedreno_resource.h"
16 
17 #include "fd6_context.h"
18 #include "fd6_emit.h"
19 #include "fd6_query.h"
20 
21 #include "fd6_pack.h"
22 
23 /* g++ is a picky about offsets that cannot be resolved at compile time, so
24  * roll our own __offsetof()
25  */
26 #define __offsetof(type, field)                                                \
27    ({ type _x = {}; ((uint8_t *)&_x.field) - ((uint8_t *)&_x);})
28 
29 struct PACKED fd6_query_sample {
30    struct fd_acc_query_sample base;
31 
32    /* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
33    uint64_t pad;
34 
35    uint64_t start;
36    uint64_t result;
37    uint64_t stop;
38 };
39 FD_DEFINE_CAST(fd_acc_query_sample, fd6_query_sample);
40 
41 /* offset of a single field of an array of fd6_query_sample: */
42 #define query_sample_idx(aq, idx, field)                                       \
43    fd_resource((aq)->prsc)->bo,                                                \
44       (idx * sizeof(struct fd6_query_sample)) +                                \
45          offsetof(struct fd6_query_sample, field),                             \
46       0, 0
47 
48 /* offset of a single field of fd6_query_sample: */
49 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
50 
51 /*
52  * Occlusion Query:
53  *
54  * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
55  * interpret results
56  */
57 
58 template <chip CHIP>
59 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)60 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
61 {
62    struct fd_context *ctx = batch->ctx;
63    struct fd_ringbuffer *ring = batch->draw;
64 
65    ASSERT_ALIGNED(struct fd6_query_sample, start, 16);
66 
67    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
68    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
69 
70    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
71       OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
72       OUT_RELOC(ring, query_sample(aq, start));
73 
74       fd6_event_write<CHIP>(ctx, ring, FD_ZPASS_DONE);
75 
76       /* Copied from blob's cmdstream, not sure why it is done. */
77       if (CHIP == A7XX) {
78          fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
79       }
80    } else {
81       OUT_PKT(ring, CP_EVENT_WRITE7,
82          CP_EVENT_WRITE7_0(
83             .event = ZPASS_DONE,
84             .write_sample_count = true,
85          ),
86          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
87       );
88       OUT_PKT(ring, CP_EVENT_WRITE7,
89          CP_EVENT_WRITE7_0(
90             .event = ZPASS_DONE,
91             .write_sample_count = true,
92             .sample_count_end_offset = true,
93             .write_accum_sample_count_diff = true,
94          ),
95          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
96       );
97    }
98 
99    ctx->occlusion_queries_active++;
100 
101    /* Just directly bash the gen specific LRZ dirty bit, since we don't
102     * need to re-emit any other LRZ related state:
103     */
104    ctx->gen_dirty |= FD6_GROUP_LRZ;
105 }
106 
107 template <chip CHIP>
108 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)109 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
110 {
111    struct fd_context *ctx = batch->ctx;
112    struct fd_ringbuffer *ring = batch->draw;
113 
114    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
115       OUT_PKT7(ring, CP_MEM_WRITE, 4);
116       OUT_RELOC(ring, query_sample(aq, stop));
117       OUT_RING(ring, 0xffffffff);
118       OUT_RING(ring, 0xffffffff);
119 
120       OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
121    }
122 
123    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
124    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
125 
126    ASSERT_ALIGNED(struct fd6_query_sample, stop, 16);
127 
128    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
129       OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
130       OUT_RELOC(ring, query_sample(aq, stop));
131 
132       fd6_event_write<CHIP>(batch->ctx, ring, FD_ZPASS_DONE);
133 
134       /* To avoid stalling in the draw buffer, emit code the code to compute the
135        * counter delta in the epilogue ring.
136        */
137       struct fd_ringbuffer *epilogue = fd_batch_get_tile_epilogue(batch);
138 
139       OUT_PKT7(epilogue, CP_WAIT_REG_MEM, 6);
140       OUT_RING(epilogue, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_NE) |
141                             CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
142       OUT_RELOC(epilogue, query_sample(aq, stop));
143       OUT_RING(epilogue, CP_WAIT_REG_MEM_3_REF(0xffffffff));
144       OUT_RING(epilogue, CP_WAIT_REG_MEM_4_MASK(0xffffffff));
145       OUT_RING(epilogue, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
146 
147       /* result += stop - start: */
148       OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
149       OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
150       OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
151       OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
152       OUT_RELOC(epilogue, query_sample(aq, stop));   /* srcB */
153       OUT_RELOC(epilogue, query_sample(aq, start));  /* srcC */
154    } else {
155       OUT_PKT(ring, CP_EVENT_WRITE7,
156          CP_EVENT_WRITE7_0(
157             .event = ZPASS_DONE,
158             .write_sample_count = true,
159          ),
160          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, stop)),
161       );
162       OUT_PKT(ring, CP_EVENT_WRITE7,
163          CP_EVENT_WRITE7_0(
164             .event = ZPASS_DONE,
165             .write_sample_count = true,
166             .sample_count_end_offset = true,
167             .write_accum_sample_count_diff = true,
168          ),
169          /* Note: SQE is adding offsets to the iova, SAMPLE_COUNT_END_OFFSET causes
170           * the result to be written to iova+16, and WRITE_ACCUM_SAMP_COUNT_DIFF
171           * does *(iova + 8) += *(iova + 16) - *iova
172           *
173           * It just so happens this is the layout we already to for start/result/stop
174           * So we just give the start address in all cases.
175           */
176          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
177       );
178    }
179 
180    assert(ctx->occlusion_queries_active > 0);
181    ctx->occlusion_queries_active--;
182 
183    /* Just directly bash the gen specific LRZ dirty bit, since we don't
184     * need to re-emit any other LRZ related state:
185     */
186    ctx->gen_dirty |= FD6_GROUP_LRZ;
187 }
188 
189 static void
occlusion_counter_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)190 occlusion_counter_result(struct fd_acc_query *aq,
191                          struct fd_acc_query_sample *s,
192                          union pipe_query_result *result)
193 {
194    struct fd6_query_sample *sp = fd6_query_sample(s);
195    result->u64 = sp->result;
196 }
197 
198 static void
occlusion_counter_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)199 occlusion_counter_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
200                                   enum pipe_query_value_type result_type,
201                                   int index, struct fd_resource *dst,
202                                   unsigned offset)
203 {
204    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
205                offsetof(struct fd6_query_sample, result));
206 }
207 
208 static void
occlusion_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)209 occlusion_predicate_result(struct fd_acc_query *aq,
210                            struct fd_acc_query_sample *s,
211                            union pipe_query_result *result)
212 {
213    struct fd6_query_sample *sp = fd6_query_sample(s);
214    result->b = !!sp->result;
215 }
216 
217 static void
occlusion_predicate_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)218 occlusion_predicate_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
219                                     enum pipe_query_value_type result_type,
220                                     int index, struct fd_resource *dst,
221                                     unsigned offset)
222 {
223    /* This is a bit annoying but we need to turn the result into a one or
224     * zero.. to do this use a CP_COND_WRITE to overwrite the result with
225     * a one if it is non-zero.  This doesn't change the results if the
226     * query is also read on the CPU (ie. occlusion_predicate_result()).
227     */
228    OUT_PKT7(ring, CP_COND_WRITE5, 9);
229    OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_NE) |
230                   CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY) |
231                   CP_COND_WRITE5_0_WRITE_MEMORY);
232    OUT_RELOC(ring, query_sample(aq, result)); /* POLL_ADDR_LO/HI */
233    OUT_RING(ring, CP_COND_WRITE5_3_REF(0));
234    OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
235    OUT_RELOC(ring, query_sample(aq, result)); /* WRITE_ADDR_LO/HI */
236    OUT_RING(ring, 1);
237    OUT_RING(ring, 0);
238 
239    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
240                offsetof(struct fd6_query_sample, result));
241 }
242 
243 template <chip CHIP>
244 static const struct fd_acc_sample_provider occlusion_counter = {
245    .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
246    .size = sizeof(struct fd6_query_sample),
247    .resume = occlusion_resume<CHIP>,
248    .pause = occlusion_pause<CHIP>,
249    .result = occlusion_counter_result,
250    .result_resource = occlusion_counter_result_resource,
251 };
252 
253 template <chip CHIP>
254 static const struct fd_acc_sample_provider occlusion_predicate = {
255    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
256    .size = sizeof(struct fd6_query_sample),
257    .resume = occlusion_resume<CHIP>,
258    .pause = occlusion_pause<CHIP>,
259    .result = occlusion_predicate_result,
260    .result_resource = occlusion_predicate_result_resource,
261 };
262 
263 template <chip CHIP>
264 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
265    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
266    .size = sizeof(struct fd6_query_sample),
267    .resume = occlusion_resume<CHIP>,
268    .pause = occlusion_pause<CHIP>,
269    .result = occlusion_predicate_result,
270    .result_resource = occlusion_predicate_result_resource,
271 };
272 
273 /*
274  * Timestamp Queries:
275  */
276 
277 template <chip CHIP>
278 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)279 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
280 {
281    struct fd_ringbuffer *ring = batch->draw;
282 
283    fd6_record_ts<CHIP>(ring, query_sample(aq, start));
284 }
285 
286 template <chip CHIP>
287 static void
time_elapsed_pause(struct fd_acc_query * aq,struct fd_batch * batch)288 time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
289 {
290    struct fd_ringbuffer *ring = batch->draw;
291 
292    fd6_record_ts<CHIP>(ring, query_sample(aq, stop));
293 
294    OUT_WFI5(ring);
295 
296    /* result += stop - start: */
297    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
298    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
299    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
300    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
301    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
302    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
303 }
304 
305 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)306 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
307 {
308    /* We captured a timestamp in timestamp_resume(), nothing to do here. */
309 }
310 
311 /* timestamp logging for u_trace: */
312 template <chip CHIP>
313 static void
record_timestamp(struct fd_ringbuffer * ring,struct fd_bo * bo,unsigned offset)314 record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
315 {
316    fd_ringbuffer_attach_bo(ring, bo);
317    fd6_record_ts<CHIP>(ring, bo, offset, 0, 0);
318 }
319 
320 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)321 time_elapsed_accumulate_result(struct fd_acc_query *aq,
322                                struct fd_acc_query_sample *s,
323                                union pipe_query_result *result)
324 {
325    struct fd6_query_sample *sp = fd6_query_sample(s);
326    result->u64 = ticks_to_ns(sp->result);
327 }
328 
329 static void
time_elapsed_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)330 time_elapsed_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
331                              enum pipe_query_value_type result_type,
332                              int index, struct fd_resource *dst,
333                              unsigned offset)
334 {
335    // TODO ticks_to_ns conversion would require spinning up a compute shader?
336    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
337                offsetof(struct fd6_query_sample, result));
338 }
339 
340 static void
timestamp_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)341 timestamp_accumulate_result(struct fd_acc_query *aq,
342                             struct fd_acc_query_sample *s,
343                             union pipe_query_result *result)
344 {
345    struct fd6_query_sample *sp = fd6_query_sample(s);
346    result->u64 = ticks_to_ns(sp->start);
347 }
348 
349 static void
timestamp_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)350 timestamp_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
351                           enum pipe_query_value_type result_type,
352                           int index, struct fd_resource *dst,
353                           unsigned offset)
354 {
355    // TODO ticks_to_ns conversion would require spinning up a compute shader?
356    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
357                offsetof(struct fd6_query_sample, start));
358 }
359 
360 template <chip CHIP>
361 static const struct fd_acc_sample_provider time_elapsed = {
362    .query_type = PIPE_QUERY_TIME_ELAPSED,
363    .always = true,
364    .size = sizeof(struct fd6_query_sample),
365    .resume = timestamp_resume<CHIP>,
366    .pause = time_elapsed_pause<CHIP>,
367    .result = time_elapsed_accumulate_result,
368    .result_resource = time_elapsed_result_resource,
369 };
370 
371 /* NOTE: timestamp query isn't going to give terribly sensible results
372  * on a tiler.  But it is needed by qapitrace profile heatmap.  If you
373  * add in a binning pass, the results get even more non-sensical.  So
374  * we just return the timestamp on the last tile and hope that is
375  * kind of good enough.
376  */
377 
378 template <chip CHIP>
379 static const struct fd_acc_sample_provider timestamp = {
380    .query_type = PIPE_QUERY_TIMESTAMP,
381    .always = true,
382    .size = sizeof(struct fd6_query_sample),
383    .resume = timestamp_resume<CHIP>,
384    .pause = timestamp_pause,
385    .result = timestamp_accumulate_result,
386    .result_resource = timestamp_result_resource,
387 };
388 
389 struct PACKED fd6_pipeline_stats_sample {
390    struct fd_acc_query_sample base;
391 
392    uint64_t start, stop, result;
393 };
394 FD_DEFINE_CAST(fd_acc_query_sample, fd6_pipeline_stats_sample);
395 
396 #define stats_reloc(ring, aq, field)                                           \
397    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
398              offsetof(struct fd6_pipeline_stats_sample, field), 0, 0);
399 
400 /* Mapping of counters to pipeline stats:
401  *
402  *   Gallium (PIPE_STAT_QUERY_x) | Vulkan (VK_QUERY_PIPELINE_STATISTIC_x_BIT) | hw counter
403  *   ----------------------------+--------------------------------------------+----------------
404  *   IA_VERTICES                 | INPUT_ASSEMBLY_VERTICES                    | RBBM_PRIMCTR_0
405  *   IA_PRIMITIVES               | INPUT_ASSEMBLY_PRIMITIVES                  | RBBM_PRIMCTR_1
406  *   VS_INVOCATIONS              | VERTEX_SHADER_INVOCATIONS                  | RBBM_PRIMCTR_2
407  *   GS_INVOCATIONS              | GEOMETRY_SHADER_INVOCATIONS                | RBBM_PRIMCTR_5
408  *   GS_PRIMITIVES               | GEOMETRY_SHADER_PRIMITIVES                 | RBBM_PRIMCTR_6
409  *   C_INVOCATIONS               | CLIPPING_INVOCATIONS                       | RBBM_PRIMCTR_7
410  *   C_PRIMITIVES                | CLIPPING_PRIMITIVES                        | RBBM_PRIMCTR_8
411  *   PS_INVOCATIONS              | FRAGMENT_SHADER_INVOCATIONS                | RBBM_PRIMCTR_9
412  *   HS_INVOCATIONS              | TESSELLATION_CONTROL_SHADER_PATCHES        | RBBM_PRIMCTR_3
413  *   DS_INVOCATIONS              | TESSELLATION_EVALUATION_SHADER_INVOCATIONS | RBBM_PRIMCTR_4
414  *   CS_INVOCATIONS              | COMPUTE_SHADER_INVOCATIONS                 | RBBM_PRIMCTR_10
415  */
416 
417 enum stats_type {
418    STATS_PRIMITIVE,
419    STATS_FRAGMENT,
420    STATS_COMPUTE,
421 };
422 
423 static const struct {
424    enum fd_gpu_event start, stop;
425 } stats_counter_events[] = {
426       [STATS_PRIMITIVE] = { FD_START_PRIMITIVE_CTRS, FD_STOP_PRIMITIVE_CTRS },
427       [STATS_FRAGMENT]  = { FD_START_FRAGMENT_CTRS,  FD_STOP_FRAGMENT_CTRS },
428       [STATS_COMPUTE]   = { FD_START_COMPUTE_CTRS,   FD_STOP_COMPUTE_CTRS },
429 };
430 
431 static enum stats_type
get_stats_type(struct fd_acc_query * aq)432 get_stats_type(struct fd_acc_query *aq)
433 {
434    if (aq->provider->query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
435       return STATS_PRIMITIVE;
436 
437    switch (aq->base.index) {
438    case PIPE_STAT_QUERY_PS_INVOCATIONS: return STATS_FRAGMENT;
439    case PIPE_STAT_QUERY_CS_INVOCATIONS: return STATS_COMPUTE;
440    default:
441       return STATS_PRIMITIVE;
442    }
443 }
444 
445 static unsigned
stats_counter_index(struct fd_acc_query * aq)446 stats_counter_index(struct fd_acc_query *aq)
447 {
448    if (aq->provider->query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
449       return 7;
450 
451    switch (aq->base.index) {
452    case PIPE_STAT_QUERY_IA_VERTICES:    return 0;
453    case PIPE_STAT_QUERY_IA_PRIMITIVES:  return 1;
454    case PIPE_STAT_QUERY_VS_INVOCATIONS: return 2;
455    case PIPE_STAT_QUERY_GS_INVOCATIONS: return 5;
456    case PIPE_STAT_QUERY_GS_PRIMITIVES:  return 6;
457    case PIPE_STAT_QUERY_C_INVOCATIONS:  return 7;
458    case PIPE_STAT_QUERY_C_PRIMITIVES:   return 8;
459    case PIPE_STAT_QUERY_PS_INVOCATIONS: return 9;
460    case PIPE_STAT_QUERY_HS_INVOCATIONS: return 3;
461    case PIPE_STAT_QUERY_DS_INVOCATIONS: return 4;
462    case PIPE_STAT_QUERY_CS_INVOCATIONS: return 10;
463    default:
464       return 0;
465    }
466 }
467 
468 static void
log_pipeline_stats(struct fd6_pipeline_stats_sample * ps,unsigned idx)469 log_pipeline_stats(struct fd6_pipeline_stats_sample *ps, unsigned idx)
470 {
471 #ifdef DEBUG_COUNTERS
472    const char *labels[] = {
473       "IA_VERTICES",
474       "IA_PRIMITIVES",
475       "VS_INVOCATIONS",
476       "HS_INVOCATIONS",
477       "DS_INVOCATIONS",
478       "GS_INVOCATIONS",
479       "GS_PRIMITIVES",
480       "C_INVOCATIONS",
481       "C_PRIMITIVES",
482       "PS_INVOCATIONS",
483       "CS_INVOCATIONS",
484    };
485 
486    mesa_logd("  counter\t\tstart\t\t\tstop\t\t\tdiff");
487    mesa_logd("  RBBM_PRIMCTR_%d\t0x%016" PRIx64 "\t0x%016" PRIx64 "\t%" PRIi64 "\t%s",
488              idx, ps->start, ps->stop, ps->stop - ps->start, labels[idx]);
489 #endif
490 }
491 
492 template <chip CHIP>
493 static void
pipeline_stats_resume(struct fd_acc_query * aq,struct fd_batch * batch)494 pipeline_stats_resume(struct fd_acc_query *aq, struct fd_batch *batch)
495    assert_dt
496 {
497    struct fd_ringbuffer *ring = batch->draw;
498    enum stats_type type = get_stats_type(aq);
499    unsigned idx = stats_counter_index(aq);
500    unsigned reg = REG_A6XX_RBBM_PRIMCTR_0_LO + (2 * idx);
501 
502    OUT_WFI5(ring);
503 
504    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
505    OUT_RING(ring, CP_REG_TO_MEM_0_64B |
506                   CP_REG_TO_MEM_0_CNT(2) |
507                   CP_REG_TO_MEM_0_REG(reg));
508    stats_reloc(ring, aq, start);
509 
510    assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
511 
512    if (!batch->pipeline_stats_queries_active[type])
513       fd6_event_write<CHIP>(batch->ctx, ring, stats_counter_events[type].start);
514    batch->pipeline_stats_queries_active[type]++;
515 }
516 
517 template <chip CHIP>
518 static void
pipeline_stats_pause(struct fd_acc_query * aq,struct fd_batch * batch)519 pipeline_stats_pause(struct fd_acc_query *aq, struct fd_batch *batch)
520    assert_dt
521 {
522    struct fd_ringbuffer *ring = batch->draw;
523    enum stats_type type = get_stats_type(aq);
524    unsigned idx = stats_counter_index(aq);
525    unsigned reg = REG_A6XX_RBBM_PRIMCTR_0_LO + (2 * idx);
526 
527    OUT_WFI5(ring);
528 
529    /* snapshot the end values: */
530    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
531    OUT_RING(ring, CP_REG_TO_MEM_0_64B |
532                   CP_REG_TO_MEM_0_CNT(2) |
533                   CP_REG_TO_MEM_0_REG(reg));
534    stats_reloc(ring, aq, stop);
535 
536    assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
537    assert(batch->pipeline_stats_queries_active[type] > 0);
538 
539    batch->pipeline_stats_queries_active[type]--;
540    if (batch->pipeline_stats_queries_active[type])
541       fd6_event_write<CHIP>(batch->ctx, ring, stats_counter_events[type].stop);
542 
543    /* result += stop - start: */
544    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
545    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
546    stats_reloc(ring, aq, result);
547    stats_reloc(ring, aq, result);
548    stats_reloc(ring, aq, stop)
549    stats_reloc(ring, aq, start);
550 }
551 
552 static void
pipeline_stats_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)553 pipeline_stats_result(struct fd_acc_query *aq,
554                       struct fd_acc_query_sample *s,
555                       union pipe_query_result *result)
556 {
557    struct fd6_pipeline_stats_sample *ps = fd6_pipeline_stats_sample(s);
558 
559    log_pipeline_stats(ps, stats_counter_index(aq));
560 
561    result->u64 = ps->result;
562 }
563 
564 static void
pipeline_stats_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)565 pipeline_stats_result_resource(struct fd_acc_query *aq,
566                                struct fd_ringbuffer *ring,
567                                enum pipe_query_value_type result_type,
568                                int index, struct fd_resource *dst,
569                                unsigned offset)
570 {
571    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
572                offsetof(struct fd6_pipeline_stats_sample, result));
573 }
574 
575 template <chip CHIP>
576 static const struct fd_acc_sample_provider primitives_generated = {
577    .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
578    .size = sizeof(struct fd6_pipeline_stats_sample),
579    .resume = pipeline_stats_resume<CHIP>,
580    .pause = pipeline_stats_pause<CHIP>,
581    .result = pipeline_stats_result,
582    .result_resource = pipeline_stats_result_resource,
583 };
584 
585 template <chip CHIP>
586 static const struct fd_acc_sample_provider pipeline_statistics_single = {
587    .query_type = PIPE_QUERY_PIPELINE_STATISTICS_SINGLE,
588    .size = sizeof(struct fd6_pipeline_stats_sample),
589    .resume = pipeline_stats_resume<CHIP>,
590    .pause = pipeline_stats_pause<CHIP>,
591    .result = pipeline_stats_result,
592    .result_resource = pipeline_stats_result_resource,
593 };
594 
595 struct PACKED fd6_primitives_sample {
596    struct fd_acc_query_sample base;
597 
598    /* VPC_SO_STREAM_COUNTS dest address must be 32b aligned: */
599    uint64_t pad[3];
600 
601    struct {
602       uint64_t emitted, generated;
603    } start[4], stop[4], result;
604 };
605 FD_DEFINE_CAST(fd_acc_query_sample, fd6_primitives_sample);
606 
607 #define primitives_reloc(ring, aq, field)                                      \
608    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
609              __offsetof(struct fd6_primitives_sample, field), 0, 0);
610 
611 static void
log_primitives_sample(struct fd6_primitives_sample * ps)612 log_primitives_sample(struct fd6_primitives_sample *ps)
613 {
614 #ifdef DEBUG_COUNTERS
615    mesa_logd("  so counts");
616    for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
617       mesa_logd("  CHANNEL %d emitted\t0x%016" PRIx64 "\t0x%016" PRIx64
618              "\t%" PRIi64,
619              i, ps->start[i].generated, ps->stop[i].generated,
620              ps->stop[i].generated - ps->start[i].generated);
621       mesa_logd("  CHANNEL %d generated\t0x%016" PRIx64 "\t0x%016" PRIx64
622              "\t%" PRIi64,
623              i, ps->start[i].emitted, ps->stop[i].emitted,
624              ps->stop[i].emitted - ps->start[i].emitted);
625    }
626 
627    mesa_logd("generated %" PRIu64 ", emitted %" PRIu64, ps->result.generated,
628           ps->result.emitted);
629 #endif
630 }
631 
632 template <chip CHIP>
633 static void
primitives_emitted_resume(struct fd_acc_query * aq,struct fd_batch * batch)634 primitives_emitted_resume(struct fd_acc_query *aq,
635                           struct fd_batch *batch) assert_dt
636 {
637    struct fd_ringbuffer *ring = batch->draw;
638 
639    OUT_WFI5(ring);
640 
641    ASSERT_ALIGNED(struct fd6_primitives_sample, start[0], 32);
642 
643    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
644    primitives_reloc(ring, aq, start[0]);
645 
646    fd6_event_write<CHIP>(batch->ctx, ring, FD_WRITE_PRIMITIVE_COUNTS);
647 }
648 
649 static void
accumultate_primitives_emitted(struct fd_acc_query * aq,struct fd_ringbuffer * ring,int idx)650 accumultate_primitives_emitted(struct fd_acc_query *aq,
651                                struct fd_ringbuffer *ring,
652                                int idx)
653 {
654    /* result += stop - start: */
655    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
656    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
657    primitives_reloc(ring, aq, result.emitted);
658    primitives_reloc(ring, aq, result.emitted);
659    primitives_reloc(ring, aq, stop[idx].emitted);
660    primitives_reloc(ring, aq, start[idx].emitted);
661 }
662 
663 static void
accumultate_primitives_generated(struct fd_acc_query * aq,struct fd_ringbuffer * ring,int idx)664 accumultate_primitives_generated(struct fd_acc_query *aq,
665                                  struct fd_ringbuffer *ring,
666                                  int idx)
667 {
668    /* result += stop - start: */
669    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
670    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
671    primitives_reloc(ring, aq, result.generated);
672    primitives_reloc(ring, aq, result.generated);
673    primitives_reloc(ring, aq, stop[idx].generated);
674    primitives_reloc(ring, aq, start[idx].generated);
675 }
676 
677 template <chip CHIP>
678 static void
primitives_emitted_pause(struct fd_acc_query * aq,struct fd_batch * batch)679 primitives_emitted_pause(struct fd_acc_query *aq,
680                          struct fd_batch *batch) assert_dt
681 {
682    struct fd_ringbuffer *ring = batch->draw;
683 
684    OUT_WFI5(ring);
685 
686    ASSERT_ALIGNED(struct fd6_primitives_sample, stop[0], 32);
687 
688    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
689    primitives_reloc(ring, aq, stop[0]);
690 
691    fd6_event_write<CHIP>(batch->ctx, ring, FD_WRITE_PRIMITIVE_COUNTS);
692    fd6_event_write<CHIP>(batch->ctx, ring, FD_CACHE_CLEAN);
693 
694    if (aq->provider->query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
695       /* Need results from all channels: */
696       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
697          accumultate_primitives_emitted(aq, ring, i);
698          accumultate_primitives_generated(aq, ring, i);
699       }
700    } else {
701       accumultate_primitives_emitted(aq, ring, aq->base.index);
702       /* Only need primitives generated counts for the overflow queries: */
703       if (aq->provider->query_type == PIPE_QUERY_SO_OVERFLOW_PREDICATE)
704          accumultate_primitives_generated(aq, ring, aq->base.index);
705    }
706 }
707 
708 static void
primitives_emitted_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)709 primitives_emitted_result(struct fd_acc_query *aq,
710                           struct fd_acc_query_sample *s,
711                           union pipe_query_result *result)
712 {
713    struct fd6_primitives_sample *ps = fd6_primitives_sample(s);
714 
715    log_primitives_sample(ps);
716 
717    result->u64 = ps->result.emitted;
718 }
719 
720 static void
primitives_emitted_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)721 primitives_emitted_result_resource(struct fd_acc_query *aq,
722                                    struct fd_ringbuffer *ring,
723                                    enum pipe_query_value_type result_type,
724                                    int index, struct fd_resource *dst,
725                                    unsigned offset)
726 {
727    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
728                offsetof(struct fd6_primitives_sample, result.emitted));
729 }
730 
731 static void
so_overflow_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)732 so_overflow_predicate_result(struct fd_acc_query *aq,
733                              struct fd_acc_query_sample *s,
734                              union pipe_query_result *result)
735 {
736    struct fd6_primitives_sample *ps = fd6_primitives_sample(s);
737 
738    log_primitives_sample(ps);
739 
740    result->b = ps->result.emitted != ps->result.generated;
741 }
742 
743 static void
so_overflow_predicate_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)744 so_overflow_predicate_result_resource(struct fd_acc_query *aq,
745                                       struct fd_ringbuffer *ring,
746                                       enum pipe_query_value_type result_type,
747                                       int index, struct fd_resource *dst,
748                                       unsigned offset)
749 {
750    fd_ringbuffer_attach_bo(ring, dst->bo);
751    fd_ringbuffer_attach_bo(ring, fd_resource(aq->prsc)->bo);
752 
753    /* result = generated - emitted: */
754    OUT_PKT7(ring, CP_MEM_TO_MEM, 7);
755    OUT_RING(ring, CP_MEM_TO_MEM_0_NEG_B |
756             COND(result_type >= PIPE_QUERY_TYPE_I64, CP_MEM_TO_MEM_0_DOUBLE));
757    OUT_RELOC(ring, dst->bo, offset, 0, 0);
758    primitives_reloc(ring, aq, result.generated);
759    primitives_reloc(ring, aq, result.emitted);
760 
761    /* This is a bit awkward, but glcts expects the result to be 1 or 0
762     * rather than non-zero vs zero:
763     */
764    OUT_PKT7(ring, CP_COND_WRITE5, 9);
765    OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_NE) |
766                   CP_COND_WRITE5_0_POLL(POLL_MEMORY) |
767                   CP_COND_WRITE5_0_WRITE_MEMORY);
768    OUT_RELOC(ring, dst->bo, offset, 0, 0);    /* POLL_ADDR_LO/HI */
769    OUT_RING(ring, CP_COND_WRITE5_3_REF(0));
770    OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
771    OUT_RELOC(ring, dst->bo, offset, 0, 0);    /* WRITE_ADDR_LO/HI */
772    OUT_RING(ring, 1);
773    OUT_RING(ring, 0);
774 }
775 
776 template <chip CHIP>
777 static const struct fd_acc_sample_provider primitives_emitted = {
778    .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
779    .size = sizeof(struct fd6_primitives_sample),
780    .resume = primitives_emitted_resume<CHIP>,
781    .pause = primitives_emitted_pause<CHIP>,
782    .result = primitives_emitted_result,
783    .result_resource = primitives_emitted_result_resource,
784 };
785 
786 template <chip CHIP>
787 static const struct fd_acc_sample_provider so_overflow_any_predicate = {
788    .query_type = PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE,
789    .size = sizeof(struct fd6_primitives_sample),
790    .resume = primitives_emitted_resume<CHIP>,
791    .pause = primitives_emitted_pause<CHIP>,
792    .result = so_overflow_predicate_result,
793    .result_resource = so_overflow_predicate_result_resource,
794 };
795 
796 template <chip CHIP>
797 static const struct fd_acc_sample_provider so_overflow_predicate = {
798    .query_type = PIPE_QUERY_SO_OVERFLOW_PREDICATE,
799    .size = sizeof(struct fd6_primitives_sample),
800    .resume = primitives_emitted_resume<CHIP>,
801    .pause = primitives_emitted_pause<CHIP>,
802    .result = so_overflow_predicate_result,
803    .result_resource = so_overflow_predicate_result_resource,
804 };
805 
806 /*
807  * Performance Counter (batch) queries:
808  *
809  * Only one of these is active at a time, per design of the gallium
810  * batch_query API design.  On perfcntr query tracks N query_types,
811  * each of which has a 'fd_batch_query_entry' that maps it back to
812  * the associated group and counter.
813  */
814 
815 struct fd_batch_query_entry {
816    uint8_t gid; /* group-id */
817    uint8_t cid; /* countable-id within the group */
818 };
819 
820 struct fd_batch_query_data {
821    struct fd_screen *screen;
822    unsigned num_query_entries;
823    struct fd_batch_query_entry query_entries[];
824 };
825 
826 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)827 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
828 {
829    struct fd_batch_query_data *data = (struct fd_batch_query_data *)aq->query_data;
830    struct fd_screen *screen = data->screen;
831    struct fd_ringbuffer *ring = batch->draw;
832 
833    unsigned counters_per_group[screen->num_perfcntr_groups];
834    memset(counters_per_group, 0, sizeof(counters_per_group));
835 
836    OUT_WFI5(ring);
837 
838    /* configure performance counters for the requested queries: */
839    for (unsigned i = 0; i < data->num_query_entries; i++) {
840       struct fd_batch_query_entry *entry = &data->query_entries[i];
841       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
842       unsigned counter_idx = counters_per_group[entry->gid]++;
843 
844       assert(counter_idx < g->num_counters);
845 
846       OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
847       OUT_RING(ring, g->countables[entry->cid].selector);
848    }
849 
850    memset(counters_per_group, 0, sizeof(counters_per_group));
851 
852    /* and snapshot the start values */
853    for (unsigned i = 0; i < data->num_query_entries; i++) {
854       struct fd_batch_query_entry *entry = &data->query_entries[i];
855       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
856       unsigned counter_idx = counters_per_group[entry->gid]++;
857       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
858 
859       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
860       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
861                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
862       OUT_RELOC(ring, query_sample_idx(aq, i, start));
863    }
864 }
865 
866 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)867 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
868 {
869    struct fd_batch_query_data *data = (struct fd_batch_query_data *)aq->query_data;
870    struct fd_screen *screen = data->screen;
871    struct fd_ringbuffer *ring = batch->draw;
872 
873    unsigned counters_per_group[screen->num_perfcntr_groups];
874    memset(counters_per_group, 0, sizeof(counters_per_group));
875 
876    OUT_WFI5(ring);
877 
878    /* TODO do we need to bother to turn anything off? */
879 
880    /* snapshot the end values: */
881    for (unsigned i = 0; i < data->num_query_entries; i++) {
882       struct fd_batch_query_entry *entry = &data->query_entries[i];
883       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
884       unsigned counter_idx = counters_per_group[entry->gid]++;
885       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
886 
887       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
888       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
889                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
890       OUT_RELOC(ring, query_sample_idx(aq, i, stop));
891    }
892 
893    /* and compute the result: */
894    for (unsigned i = 0; i < data->num_query_entries; i++) {
895       /* result += stop - start: */
896       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
897       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
898       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
899       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
900       OUT_RELOC(ring, query_sample_idx(aq, i, stop));   /* srcB */
901       OUT_RELOC(ring, query_sample_idx(aq, i, start));  /* srcC */
902    }
903 }
904 
905 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)906 perfcntr_accumulate_result(struct fd_acc_query *aq,
907                            struct fd_acc_query_sample *s,
908                            union pipe_query_result *result)
909 {
910    struct fd_batch_query_data *data =
911          (struct fd_batch_query_data *)aq->query_data;
912    struct fd6_query_sample *sp = fd6_query_sample(s);
913 
914    for (unsigned i = 0; i < data->num_query_entries; i++) {
915       result->batch[i].u64 = sp[i].result;
916    }
917 }
918 
919 static const struct fd_acc_sample_provider perfcntr = {
920    .query_type = FD_QUERY_FIRST_PERFCNTR,
921    .always = true,
922    .resume = perfcntr_resume,
923    .pause = perfcntr_pause,
924    .result = perfcntr_accumulate_result,
925 };
926 
927 static struct pipe_query *
fd6_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)928 fd6_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
929                        unsigned *query_types)
930 {
931    struct fd_context *ctx = fd_context(pctx);
932    struct fd_screen *screen = ctx->screen;
933    struct fd_query *q;
934    struct fd_acc_query *aq;
935    struct fd_batch_query_data *data;
936 
937    data = CALLOC_VARIANT_LENGTH_STRUCT(
938       fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
939 
940    data->screen = screen;
941    data->num_query_entries = num_queries;
942 
943    /* validate the requested query_types and ensure we don't try
944     * to request more query_types of a given group than we have
945     * counters:
946     */
947    unsigned counters_per_group[screen->num_perfcntr_groups];
948    memset(counters_per_group, 0, sizeof(counters_per_group));
949 
950    for (unsigned i = 0; i < num_queries; i++) {
951       unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
952 
953       /* verify valid query_type, ie. is it actually a perfcntr? */
954       if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
955           (idx >= screen->num_perfcntr_queries)) {
956          mesa_loge("invalid batch query query_type: %u", query_types[i]);
957          goto error;
958       }
959 
960       struct fd_batch_query_entry *entry = &data->query_entries[i];
961       struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
962 
963       entry->gid = pq->group_id;
964 
965       /* the perfcntr_queries[] table flattens all the countables
966        * for each group in series, ie:
967        *
968        *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
969        *
970        * So to find the countable index just step back through the
971        * table to find the first entry with the same group-id.
972        */
973       while (pq > screen->perfcntr_queries) {
974          pq--;
975          if (pq->group_id == entry->gid)
976             entry->cid++;
977       }
978 
979       if (counters_per_group[entry->gid] >=
980           screen->perfcntr_groups[entry->gid].num_counters) {
981          mesa_loge("too many counters for group %u", entry->gid);
982          goto error;
983       }
984 
985       counters_per_group[entry->gid]++;
986    }
987 
988    q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
989    aq = fd_acc_query(q);
990 
991    /* sample buffer size is based on # of queries: */
992    aq->size = num_queries * sizeof(struct fd6_query_sample);
993    aq->query_data = data;
994 
995    return (struct pipe_query *)q;
996 
997 error:
998    free(data);
999    return NULL;
1000 }
1001 
1002 template <chip CHIP>
1003 void
fd6_query_context_init(struct pipe_context * pctx)1004 fd6_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
1005 {
1006    struct fd_context *ctx = fd_context(pctx);
1007 
1008    ctx->create_query = fd_acc_create_query;
1009    ctx->query_update_batch = fd_acc_query_update_batch;
1010 
1011    ctx->record_timestamp = record_timestamp<CHIP>;
1012    ctx->ts_to_ns = ticks_to_ns;
1013 
1014    pctx->create_batch_query = fd6_create_batch_query;
1015 
1016    fd_acc_query_register_provider(pctx, &occlusion_counter<CHIP>);
1017    fd_acc_query_register_provider(pctx, &occlusion_predicate<CHIP>);
1018    fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative<CHIP>);
1019 
1020    fd_acc_query_register_provider(pctx, &time_elapsed<CHIP>);
1021    fd_acc_query_register_provider(pctx, &timestamp<CHIP>);
1022 
1023    fd_acc_query_register_provider(pctx, &primitives_generated<CHIP>);
1024    fd_acc_query_register_provider(pctx, &pipeline_statistics_single<CHIP>);
1025 
1026    fd_acc_query_register_provider(pctx, &primitives_emitted<CHIP>);
1027    fd_acc_query_register_provider(pctx, &so_overflow_any_predicate<CHIP>);
1028    fd_acc_query_register_provider(pctx, &so_overflow_predicate<CHIP>);
1029 }
1030 FD_GENX(fd6_query_context_init);
1031