• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
29 
30 #include "freedreno_query_acc.h"
31 #include "freedreno_resource.h"
32 
33 #include "fd6_context.h"
34 #include "fd6_emit.h"
35 #include "fd6_query.h"
36 
37 struct PACKED fd6_query_sample {
38    uint64_t start;
39    uint64_t result;
40    uint64_t stop;
41 };
42 
43 /* offset of a single field of an array of fd6_query_sample: */
44 #define query_sample_idx(aq, idx, field)                                       \
45    fd_resource((aq)->prsc)->bo,                                                \
46       (idx * sizeof(struct fd6_query_sample)) +                                \
47          offsetof(struct fd6_query_sample, field),                             \
48       0, 0
49 
50 /* offset of a single field of fd6_query_sample: */
51 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
52 
53 /*
54  * Occlusion Query:
55  *
56  * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
57  * interpret results
58  */
59 
60 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)61 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
62 {
63    struct fd_ringbuffer *ring = batch->draw;
64 
65    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
66    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
67 
68    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
69    OUT_RELOC(ring, query_sample(aq, start));
70 
71    fd6_event_write(batch, ring, ZPASS_DONE, false);
72 
73    fd6_context(batch->ctx)->samples_passed_queries++;
74 }
75 
76 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)77 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
78 {
79    struct fd_ringbuffer *ring = batch->draw;
80 
81    OUT_PKT7(ring, CP_MEM_WRITE, 4);
82    OUT_RELOC(ring, query_sample(aq, stop));
83    OUT_RING(ring, 0xffffffff);
84    OUT_RING(ring, 0xffffffff);
85 
86    OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
87 
88    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
89    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
90 
91    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
92    OUT_RELOC(ring, query_sample(aq, stop));
93 
94    fd6_event_write(batch, ring, ZPASS_DONE, false);
95 
96    /* To avoid stalling in the draw buffer, emit code the code to compute the
97     * counter delta in the epilogue ring.
98     */
99    struct fd_ringbuffer *epilogue = fd_batch_get_epilogue(batch);
100 
101    OUT_PKT7(epilogue, CP_WAIT_REG_MEM, 6);
102    OUT_RING(epilogue, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_NE) |
103                       CP_WAIT_REG_MEM_0_POLL_MEMORY);
104    OUT_RELOC(epilogue, query_sample(aq, stop));
105    OUT_RING(epilogue, CP_WAIT_REG_MEM_3_REF(0xffffffff));
106    OUT_RING(epilogue, CP_WAIT_REG_MEM_4_MASK(0xffffffff));
107    OUT_RING(epilogue, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
108 
109    /* result += stop - start: */
110    OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
111    OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
112    OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
113    OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
114    OUT_RELOC(epilogue, query_sample(aq, stop));   /* srcB */
115    OUT_RELOC(epilogue, query_sample(aq, start));  /* srcC */
116 
117    fd6_context(batch->ctx)->samples_passed_queries--;
118 }
119 
120 static void
occlusion_counter_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)121 occlusion_counter_result(struct fd_acc_query *aq, void *buf,
122                          union pipe_query_result *result)
123 {
124    struct fd6_query_sample *sp = buf;
125    result->u64 = sp->result;
126 }
127 
128 static void
occlusion_predicate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)129 occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
130                            union pipe_query_result *result)
131 {
132    struct fd6_query_sample *sp = buf;
133    result->b = !!sp->result;
134 }
135 
136 static const struct fd_acc_sample_provider occlusion_counter = {
137    .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
138    .size = sizeof(struct fd6_query_sample),
139    .resume = occlusion_resume,
140    .pause = occlusion_pause,
141    .result = occlusion_counter_result,
142 };
143 
144 static const struct fd_acc_sample_provider occlusion_predicate = {
145    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
146    .size = sizeof(struct fd6_query_sample),
147    .resume = occlusion_resume,
148    .pause = occlusion_pause,
149    .result = occlusion_predicate_result,
150 };
151 
152 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
153    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
154    .size = sizeof(struct fd6_query_sample),
155    .resume = occlusion_resume,
156    .pause = occlusion_pause,
157    .result = occlusion_predicate_result,
158 };
159 
160 /*
161  * Timestamp Queries:
162  */
163 
164 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)165 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
166 {
167    struct fd_ringbuffer *ring = batch->draw;
168 
169    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
170    OUT_RING(ring,
171             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
172    OUT_RELOC(ring, query_sample(aq, start));
173    OUT_RING(ring, 0x00000000);
174 
175    fd_reset_wfi(batch);
176 }
177 
178 static void
time_elapsed_pause(struct fd_acc_query * aq,struct fd_batch * batch)179 time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
180 {
181    struct fd_ringbuffer *ring = batch->draw;
182 
183    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
184    OUT_RING(ring,
185             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
186    OUT_RELOC(ring, query_sample(aq, stop));
187    OUT_RING(ring, 0x00000000);
188 
189    fd_reset_wfi(batch);
190    fd_wfi(batch, ring);
191 
192    /* result += stop - start: */
193    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
194    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
195    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
196    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
197    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
198    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
199 }
200 
201 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)202 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
203 {
204    /* We captured a timestamp in timestamp_resume(), nothing to do here. */
205 }
206 
207 /* timestamp logging for u_trace: */
208 static void
record_timestamp(struct fd_ringbuffer * ring,struct fd_bo * bo,unsigned offset)209 record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
210 {
211    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
212    OUT_RING(ring,
213             CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
214    OUT_RELOC(ring, bo, offset, 0, 0);
215    OUT_RING(ring, 0x00000000);
216 }
217 
218 static uint64_t
ticks_to_ns(uint64_t ts)219 ticks_to_ns(uint64_t ts)
220 {
221    /* This is based on the 19.2MHz always-on rbbm timer.
222     *
223     * TODO we should probably query this value from kernel..
224     */
225    return ts * (1000000000 / 19200000);
226 }
227 
228 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)229 time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
230                                union pipe_query_result *result)
231 {
232    struct fd6_query_sample *sp = buf;
233    result->u64 = ticks_to_ns(sp->result);
234 }
235 
236 static void
timestamp_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)237 timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
238                             union pipe_query_result *result)
239 {
240    struct fd6_query_sample *sp = buf;
241    result->u64 = ticks_to_ns(sp->start);
242 }
243 
244 static const struct fd_acc_sample_provider time_elapsed = {
245    .query_type = PIPE_QUERY_TIME_ELAPSED,
246    .always = true,
247    .size = sizeof(struct fd6_query_sample),
248    .resume = timestamp_resume,
249    .pause = time_elapsed_pause,
250    .result = time_elapsed_accumulate_result,
251 };
252 
253 /* NOTE: timestamp query isn't going to give terribly sensible results
254  * on a tiler.  But it is needed by qapitrace profile heatmap.  If you
255  * add in a binning pass, the results get even more non-sensical.  So
256  * we just return the timestamp on the last tile and hope that is
257  * kind of good enough.
258  */
259 
260 static const struct fd_acc_sample_provider timestamp = {
261    .query_type = PIPE_QUERY_TIMESTAMP,
262    .always = true,
263    .size = sizeof(struct fd6_query_sample),
264    .resume = timestamp_resume,
265    .pause = timestamp_pause,
266    .result = timestamp_accumulate_result,
267 };
268 
269 struct PACKED fd6_primitives_sample {
270    struct {
271       uint64_t emitted, generated;
272    } start[4], stop[4], result;
273 
274    uint64_t prim_start[16], prim_stop[16], prim_emitted;
275 };
276 
277 #define primitives_relocw(ring, aq, field)                                     \
278    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
279              offsetof(struct fd6_primitives_sample, field), 0, 0);
280 #define primitives_reloc(ring, aq, field)                                      \
281    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
282              offsetof(struct fd6_primitives_sample, field), 0, 0);
283 
284 #ifdef DEBUG_COUNTERS
285 static const unsigned counter_count = 10;
286 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_0_LO;
287 
288 static void
log_counters(struct fd6_primitives_sample * ps)289 log_counters(struct fd6_primitives_sample *ps)
290 {
291    const char *labels[] = {
292       "vs_vertices_in",    "vs_primitives_out",
293       "hs_vertices_in",    "hs_patches_out",
294       "ds_vertices_in",    "ds_primitives_out",
295       "gs_primitives_in",  "gs_primitives_out",
296       "ras_primitives_in", "x",
297    };
298 
299    mesa_logd("  counter\t\tstart\t\t\tstop\t\t\tdiff");
300    for (int i = 0; i < ARRAY_SIZE(labels); i++) {
301       int register_idx = i + (counter_base - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2;
302       mesa_logd("  RBBM_PRIMCTR_%d\t0x%016" PRIx64 "\t0x%016" PRIx64 "\t%" PRIi64
303              "\t%s",
304              register_idx, ps->prim_start[i], ps->prim_stop[i],
305              ps->prim_stop[i] - ps->prim_start[i], labels[register_idx]);
306    }
307 
308    mesa_logd("  so counts");
309    for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
310       mesa_logd("  CHANNEL %d emitted\t0x%016" PRIx64 "\t0x%016" PRIx64
311              "\t%" PRIi64,
312              i, ps->start[i].generated, ps->stop[i].generated,
313              ps->stop[i].generated - ps->start[i].generated);
314       mesa_logd("  CHANNEL %d generated\t0x%016" PRIx64 "\t0x%016" PRIx64
315              "\t%" PRIi64,
316              i, ps->start[i].emitted, ps->stop[i].emitted,
317              ps->stop[i].emitted - ps->start[i].emitted);
318    }
319 
320    mesa_logd("generated %" PRIu64 ", emitted %" PRIu64, ps->result.generated,
321           ps->result.emitted);
322 }
323 
324 #else
325 
326 static const unsigned counter_count = 1;
327 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_8_LO;
328 
329 static void
log_counters(struct fd6_primitives_sample * ps)330 log_counters(struct fd6_primitives_sample *ps)
331 {
332 }
333 
334 #endif
335 
336 static void
primitives_generated_resume(struct fd_acc_query * aq,struct fd_batch * batch)337 primitives_generated_resume(struct fd_acc_query *aq,
338                             struct fd_batch *batch) assert_dt
339 {
340    struct fd_ringbuffer *ring = batch->draw;
341 
342    fd_wfi(batch, ring);
343 
344    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
345    OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
346                      CP_REG_TO_MEM_0_REG(counter_base));
347    primitives_relocw(ring, aq, prim_start);
348 
349    fd6_event_write(batch, ring, START_PRIMITIVE_CTRS, false);
350 }
351 
352 static void
primitives_generated_pause(struct fd_acc_query * aq,struct fd_batch * batch)353 primitives_generated_pause(struct fd_acc_query *aq,
354                            struct fd_batch *batch) assert_dt
355 {
356    struct fd_ringbuffer *ring = batch->draw;
357 
358    fd_wfi(batch, ring);
359 
360    /* snapshot the end values: */
361    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
362    OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
363                      CP_REG_TO_MEM_0_REG(counter_base));
364    primitives_relocw(ring, aq, prim_stop);
365 
366    fd6_event_write(batch, ring, STOP_PRIMITIVE_CTRS, false);
367 
368    /* result += stop - start: */
369    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
370    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
371    primitives_relocw(ring, aq, result.generated);
372    primitives_reloc(ring, aq, prim_emitted);
373    primitives_reloc(ring, aq,
374                     prim_stop[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2])
375       primitives_reloc(
376          ring, aq, prim_start[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2]);
377 }
378 
379 static void
primitives_generated_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)380 primitives_generated_result(struct fd_acc_query *aq, void *buf,
381                             union pipe_query_result *result)
382 {
383    struct fd6_primitives_sample *ps = buf;
384 
385    log_counters(ps);
386 
387    result->u64 = ps->result.generated;
388 }
389 
390 static const struct fd_acc_sample_provider primitives_generated = {
391    .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
392    .size = sizeof(struct fd6_primitives_sample),
393    .resume = primitives_generated_resume,
394    .pause = primitives_generated_pause,
395    .result = primitives_generated_result,
396 };
397 
398 static void
primitives_emitted_resume(struct fd_acc_query * aq,struct fd_batch * batch)399 primitives_emitted_resume(struct fd_acc_query *aq,
400                           struct fd_batch *batch) assert_dt
401 {
402    struct fd_ringbuffer *ring = batch->draw;
403 
404    fd_wfi(batch, ring);
405    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
406    primitives_relocw(ring, aq, start[0]);
407 
408    fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
409 }
410 
411 static void
primitives_emitted_pause(struct fd_acc_query * aq,struct fd_batch * batch)412 primitives_emitted_pause(struct fd_acc_query *aq,
413                          struct fd_batch *batch) assert_dt
414 {
415    struct fd_ringbuffer *ring = batch->draw;
416 
417    fd_wfi(batch, ring);
418 
419    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
420    primitives_relocw(ring, aq, stop[0]);
421    fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
422 
423    fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
424 
425    /* result += stop - start: */
426    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
427    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
428    primitives_relocw(ring, aq, result.emitted);
429    primitives_reloc(ring, aq, result.emitted);
430    primitives_reloc(ring, aq, stop[aq->base.index].emitted);
431    primitives_reloc(ring, aq, start[aq->base.index].emitted);
432 }
433 
434 static void
primitives_emitted_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)435 primitives_emitted_result(struct fd_acc_query *aq, void *buf,
436                           union pipe_query_result *result)
437 {
438    struct fd6_primitives_sample *ps = buf;
439 
440    log_counters(ps);
441 
442    result->u64 = ps->result.emitted;
443 }
444 
445 static const struct fd_acc_sample_provider primitives_emitted = {
446    .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
447    .size = sizeof(struct fd6_primitives_sample),
448    .resume = primitives_emitted_resume,
449    .pause = primitives_emitted_pause,
450    .result = primitives_emitted_result,
451 };
452 
453 /*
454  * Performance Counter (batch) queries:
455  *
456  * Only one of these is active at a time, per design of the gallium
457  * batch_query API design.  On perfcntr query tracks N query_types,
458  * each of which has a 'fd_batch_query_entry' that maps it back to
459  * the associated group and counter.
460  */
461 
462 struct fd_batch_query_entry {
463    uint8_t gid; /* group-id */
464    uint8_t cid; /* countable-id within the group */
465 };
466 
467 struct fd_batch_query_data {
468    struct fd_screen *screen;
469    unsigned num_query_entries;
470    struct fd_batch_query_entry query_entries[];
471 };
472 
473 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)474 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
475 {
476    struct fd_batch_query_data *data = aq->query_data;
477    struct fd_screen *screen = data->screen;
478    struct fd_ringbuffer *ring = batch->draw;
479 
480    unsigned counters_per_group[screen->num_perfcntr_groups];
481    memset(counters_per_group, 0, sizeof(counters_per_group));
482 
483    fd_wfi(batch, ring);
484 
485    /* configure performance counters for the requested queries: */
486    for (unsigned i = 0; i < data->num_query_entries; i++) {
487       struct fd_batch_query_entry *entry = &data->query_entries[i];
488       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
489       unsigned counter_idx = counters_per_group[entry->gid]++;
490 
491       assert(counter_idx < g->num_counters);
492 
493       OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
494       OUT_RING(ring, g->countables[entry->cid].selector);
495    }
496 
497    memset(counters_per_group, 0, sizeof(counters_per_group));
498 
499    /* and snapshot the start values */
500    for (unsigned i = 0; i < data->num_query_entries; i++) {
501       struct fd_batch_query_entry *entry = &data->query_entries[i];
502       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
503       unsigned counter_idx = counters_per_group[entry->gid]++;
504       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
505 
506       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
507       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
508                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
509       OUT_RELOC(ring, query_sample_idx(aq, i, start));
510    }
511 }
512 
513 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)514 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
515 {
516    struct fd_batch_query_data *data = aq->query_data;
517    struct fd_screen *screen = data->screen;
518    struct fd_ringbuffer *ring = batch->draw;
519 
520    unsigned counters_per_group[screen->num_perfcntr_groups];
521    memset(counters_per_group, 0, sizeof(counters_per_group));
522 
523    fd_wfi(batch, ring);
524 
525    /* TODO do we need to bother to turn anything off? */
526 
527    /* snapshot the end values: */
528    for (unsigned i = 0; i < data->num_query_entries; i++) {
529       struct fd_batch_query_entry *entry = &data->query_entries[i];
530       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
531       unsigned counter_idx = counters_per_group[entry->gid]++;
532       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
533 
534       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
535       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
536                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
537       OUT_RELOC(ring, query_sample_idx(aq, i, stop));
538    }
539 
540    /* and compute the result: */
541    for (unsigned i = 0; i < data->num_query_entries; i++) {
542       /* result += stop - start: */
543       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
544       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
545       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
546       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
547       OUT_RELOC(ring, query_sample_idx(aq, i, stop));   /* srcB */
548       OUT_RELOC(ring, query_sample_idx(aq, i, start));  /* srcC */
549    }
550 }
551 
552 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)553 perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
554                            union pipe_query_result *result)
555 {
556    struct fd_batch_query_data *data = aq->query_data;
557    struct fd6_query_sample *sp = buf;
558 
559    for (unsigned i = 0; i < data->num_query_entries; i++) {
560       result->batch[i].u64 = sp[i].result;
561    }
562 }
563 
564 static const struct fd_acc_sample_provider perfcntr = {
565    .query_type = FD_QUERY_FIRST_PERFCNTR,
566    .always = true,
567    .resume = perfcntr_resume,
568    .pause = perfcntr_pause,
569    .result = perfcntr_accumulate_result,
570 };
571 
572 static struct pipe_query *
fd6_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)573 fd6_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
574                        unsigned *query_types)
575 {
576    struct fd_context *ctx = fd_context(pctx);
577    struct fd_screen *screen = ctx->screen;
578    struct fd_query *q;
579    struct fd_acc_query *aq;
580    struct fd_batch_query_data *data;
581 
582    data = CALLOC_VARIANT_LENGTH_STRUCT(
583       fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
584 
585    data->screen = screen;
586    data->num_query_entries = num_queries;
587 
588    /* validate the requested query_types and ensure we don't try
589     * to request more query_types of a given group than we have
590     * counters:
591     */
592    unsigned counters_per_group[screen->num_perfcntr_groups];
593    memset(counters_per_group, 0, sizeof(counters_per_group));
594 
595    for (unsigned i = 0; i < num_queries; i++) {
596       unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
597 
598       /* verify valid query_type, ie. is it actually a perfcntr? */
599       if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
600           (idx >= screen->num_perfcntr_queries)) {
601          mesa_loge("invalid batch query query_type: %u", query_types[i]);
602          goto error;
603       }
604 
605       struct fd_batch_query_entry *entry = &data->query_entries[i];
606       struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
607 
608       entry->gid = pq->group_id;
609 
610       /* the perfcntr_queries[] table flattens all the countables
611        * for each group in series, ie:
612        *
613        *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
614        *
615        * So to find the countable index just step back through the
616        * table to find the first entry with the same group-id.
617        */
618       while (pq > screen->perfcntr_queries) {
619          pq--;
620          if (pq->group_id == entry->gid)
621             entry->cid++;
622       }
623 
624       if (counters_per_group[entry->gid] >=
625           screen->perfcntr_groups[entry->gid].num_counters) {
626          mesa_loge("too many counters for group %u", entry->gid);
627          goto error;
628       }
629 
630       counters_per_group[entry->gid]++;
631    }
632 
633    q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
634    aq = fd_acc_query(q);
635 
636    /* sample buffer size is based on # of queries: */
637    aq->size = num_queries * sizeof(struct fd6_query_sample);
638    aq->query_data = data;
639 
640    return (struct pipe_query *)q;
641 
642 error:
643    free(data);
644    return NULL;
645 }
646 
647 void
fd6_query_context_init(struct pipe_context * pctx)648 fd6_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
649 {
650    struct fd_context *ctx = fd_context(pctx);
651 
652    ctx->create_query = fd_acc_create_query;
653    ctx->query_update_batch = fd_acc_query_update_batch;
654 
655    ctx->record_timestamp = record_timestamp;
656    ctx->ts_to_ns = ticks_to_ns;
657 
658    pctx->create_batch_query = fd6_create_batch_query;
659 
660    fd_acc_query_register_provider(pctx, &occlusion_counter);
661    fd_acc_query_register_provider(pctx, &occlusion_predicate);
662    fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
663 
664    fd_acc_query_register_provider(pctx, &time_elapsed);
665    fd_acc_query_register_provider(pctx, &timestamp);
666 
667    fd_acc_query_register_provider(pctx, &primitives_generated);
668    fd_acc_query_register_provider(pctx, &primitives_emitted);
669 }
670