1 /*
2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 /* NOTE: see https://github.com/freedreno/freedreno/wiki/A5xx-Queries */
29
30 #include "freedreno_query_acc.h"
31 #include "freedreno_resource.h"
32
33 #include "fd6_context.h"
34 #include "fd6_emit.h"
35 #include "fd6_format.h"
36 #include "fd6_query.h"
37
38 struct PACKED fd6_query_sample {
39 uint64_t start;
40 uint64_t result;
41 uint64_t stop;
42 };
43
44 /* offset of a single field of an array of fd6_query_sample: */
45 #define query_sample_idx(aq, idx, field) \
46 fd_resource((aq)->prsc)->bo, \
47 (idx * sizeof(struct fd6_query_sample)) + \
48 offsetof(struct fd6_query_sample, field), \
49 0, 0
50
51 /* offset of a single field of fd6_query_sample: */
52 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
53
54 /*
55 * Occlusion Query:
56 *
57 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
58 * interpret results
59 */
60
61 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)62 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
63 {
64 struct fd_ringbuffer *ring = batch->draw;
65
66 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
67 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
68
69 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
70 OUT_RELOC(ring, query_sample(aq, start));
71
72 fd6_event_write(batch, ring, ZPASS_DONE, false);
73
74 fd6_context(batch->ctx)->samples_passed_queries++;
75 }
76
77 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)78 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
79 {
80 struct fd_ringbuffer *ring = batch->draw;
81
82 OUT_PKT7(ring, CP_MEM_WRITE, 4);
83 OUT_RELOC(ring, query_sample(aq, stop));
84 OUT_RING(ring, 0xffffffff);
85 OUT_RING(ring, 0xffffffff);
86
87 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
88
89 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
90 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
91
92 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
93 OUT_RELOC(ring, query_sample(aq, stop));
94
95 fd6_event_write(batch, ring, ZPASS_DONE, false);
96
97 /* To avoid stalling in the draw buffer, emit code the code to compute the
98 * counter delta in the epilogue ring.
99 */
100 struct fd_ringbuffer *epilogue = fd_batch_get_epilogue(batch);
101 fd_wfi(batch, epilogue);
102
103 /* result += stop - start: */
104 OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
105 OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
106 OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
107 OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
108 OUT_RELOC(epilogue, query_sample(aq, stop)); /* srcB */
109 OUT_RELOC(epilogue, query_sample(aq, start)); /* srcC */
110
111 fd6_context(batch->ctx)->samples_passed_queries--;
112 }
113
114 static void
occlusion_counter_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)115 occlusion_counter_result(struct fd_acc_query *aq, void *buf,
116 union pipe_query_result *result)
117 {
118 struct fd6_query_sample *sp = buf;
119 result->u64 = sp->result;
120 }
121
122 static void
occlusion_predicate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)123 occlusion_predicate_result(struct fd_acc_query *aq, void *buf,
124 union pipe_query_result *result)
125 {
126 struct fd6_query_sample *sp = buf;
127 result->b = !!sp->result;
128 }
129
130 static const struct fd_acc_sample_provider occlusion_counter = {
131 .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
132 .size = sizeof(struct fd6_query_sample),
133 .resume = occlusion_resume,
134 .pause = occlusion_pause,
135 .result = occlusion_counter_result,
136 };
137
138 static const struct fd_acc_sample_provider occlusion_predicate = {
139 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
140 .size = sizeof(struct fd6_query_sample),
141 .resume = occlusion_resume,
142 .pause = occlusion_pause,
143 .result = occlusion_predicate_result,
144 };
145
146 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
147 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
148 .size = sizeof(struct fd6_query_sample),
149 .resume = occlusion_resume,
150 .pause = occlusion_pause,
151 .result = occlusion_predicate_result,
152 };
153
154 /*
155 * Timestamp Queries:
156 */
157
158 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)159 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
160 {
161 struct fd_ringbuffer *ring = batch->draw;
162
163 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
164 OUT_RING(ring,
165 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
166 OUT_RELOC(ring, query_sample(aq, start));
167 OUT_RING(ring, 0x00000000);
168
169 fd_reset_wfi(batch);
170 }
171
172 static void
time_elapsed_pause(struct fd_acc_query * aq,struct fd_batch * batch)173 time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
174 {
175 struct fd_ringbuffer *ring = batch->draw;
176
177 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
178 OUT_RING(ring,
179 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
180 OUT_RELOC(ring, query_sample(aq, stop));
181 OUT_RING(ring, 0x00000000);
182
183 fd_reset_wfi(batch);
184 fd_wfi(batch, ring);
185
186 /* result += stop - start: */
187 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
188 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
189 OUT_RELOC(ring, query_sample(aq, result)); /* dst */
190 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
191 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
192 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
193 }
194
195 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)196 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
197 {
198 /* We captured a timestamp in timestamp_resume(), nothing to do here. */
199 }
200
201 /* timestamp logging for u_trace: */
202 static void
record_timestamp(struct fd_ringbuffer * ring,struct fd_bo * bo,unsigned offset)203 record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
204 {
205 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
206 OUT_RING(ring,
207 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
208 OUT_RELOC(ring, bo, offset, 0, 0);
209 OUT_RING(ring, 0x00000000);
210 }
211
212 static uint64_t
ticks_to_ns(uint64_t ts)213 ticks_to_ns(uint64_t ts)
214 {
215 /* This is based on the 19.2MHz always-on rbbm timer.
216 *
217 * TODO we should probably query this value from kernel..
218 */
219 return ts * (1000000000 / 19200000);
220 }
221
222 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)223 time_elapsed_accumulate_result(struct fd_acc_query *aq, void *buf,
224 union pipe_query_result *result)
225 {
226 struct fd6_query_sample *sp = buf;
227 result->u64 = ticks_to_ns(sp->result);
228 }
229
230 static void
timestamp_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)231 timestamp_accumulate_result(struct fd_acc_query *aq, void *buf,
232 union pipe_query_result *result)
233 {
234 struct fd6_query_sample *sp = buf;
235 result->u64 = ticks_to_ns(sp->start);
236 }
237
238 static const struct fd_acc_sample_provider time_elapsed = {
239 .query_type = PIPE_QUERY_TIME_ELAPSED,
240 .always = true,
241 .size = sizeof(struct fd6_query_sample),
242 .resume = timestamp_resume,
243 .pause = time_elapsed_pause,
244 .result = time_elapsed_accumulate_result,
245 };
246
247 /* NOTE: timestamp query isn't going to give terribly sensible results
248 * on a tiler. But it is needed by qapitrace profile heatmap. If you
249 * add in a binning pass, the results get even more non-sensical. So
250 * we just return the timestamp on the last tile and hope that is
251 * kind of good enough.
252 */
253
254 static const struct fd_acc_sample_provider timestamp = {
255 .query_type = PIPE_QUERY_TIMESTAMP,
256 .always = true,
257 .size = sizeof(struct fd6_query_sample),
258 .resume = timestamp_resume,
259 .pause = timestamp_pause,
260 .result = timestamp_accumulate_result,
261 };
262
263 struct PACKED fd6_primitives_sample {
264 struct {
265 uint64_t emitted, generated;
266 } start[4], stop[4], result;
267
268 uint64_t prim_start[16], prim_stop[16], prim_emitted;
269 };
270
271 #define primitives_relocw(ring, aq, field) \
272 OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, \
273 offsetof(struct fd6_primitives_sample, field), 0, 0);
274 #define primitives_reloc(ring, aq, field) \
275 OUT_RELOC(ring, fd_resource((aq)->prsc)->bo, \
276 offsetof(struct fd6_primitives_sample, field), 0, 0);
277
278 #ifdef DEBUG_COUNTERS
279 static const unsigned counter_count = 10;
280 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_0_LO;
281
282 static void
log_counters(struct fd6_primitives_sample * ps)283 log_counters(struct fd6_primitives_sample *ps)
284 {
285 const char *labels[] = {
286 "vs_vertices_in", "vs_primitives_out",
287 "hs_vertices_in", "hs_patches_out",
288 "ds_vertices_in", "ds_primitives_out",
289 "gs_primitives_in", "gs_primitives_out",
290 "ras_primitives_in", "x",
291 };
292
293 mesa_logd(" counter\t\tstart\t\t\tstop\t\t\tdiff");
294 for (int i = 0; i < ARRAY_SIZE(labels); i++) {
295 int register_idx = i + (counter_base - REG_A6XX_RBBM_PRIMCTR_0_LO) / 2;
296 mesa_logd(" RBBM_PRIMCTR_%d\t0x%016" PRIx64 "\t0x%016" PRIx64 "\t%" PRIi64
297 "\t%s",
298 register_idx, ps->prim_start[i], ps->prim_stop[i],
299 ps->prim_stop[i] - ps->prim_start[i], labels[register_idx]);
300 }
301
302 mesa_logd(" so counts");
303 for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
304 mesa_logd(" CHANNEL %d emitted\t0x%016" PRIx64 "\t0x%016" PRIx64
305 "\t%" PRIi64,
306 i, ps->start[i].generated, ps->stop[i].generated,
307 ps->stop[i].generated - ps->start[i].generated);
308 mesa_logd(" CHANNEL %d generated\t0x%016" PRIx64 "\t0x%016" PRIx64
309 "\t%" PRIi64,
310 i, ps->start[i].emitted, ps->stop[i].emitted,
311 ps->stop[i].emitted - ps->start[i].emitted);
312 }
313
314 mesa_logd("generated %" PRIu64 ", emitted %" PRIu64, ps->result.generated,
315 ps->result.emitted);
316 }
317
318 #else
319
320 static const unsigned counter_count = 1;
321 static const unsigned counter_base = REG_A6XX_RBBM_PRIMCTR_8_LO;
322
323 static void
log_counters(struct fd6_primitives_sample * ps)324 log_counters(struct fd6_primitives_sample *ps)
325 {
326 }
327
328 #endif
329
330 static void
primitives_generated_resume(struct fd_acc_query * aq,struct fd_batch * batch)331 primitives_generated_resume(struct fd_acc_query *aq,
332 struct fd_batch *batch) assert_dt
333 {
334 struct fd_ringbuffer *ring = batch->draw;
335
336 fd_wfi(batch, ring);
337
338 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
339 OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
340 CP_REG_TO_MEM_0_REG(counter_base));
341 primitives_relocw(ring, aq, prim_start);
342
343 fd6_event_write(batch, ring, START_PRIMITIVE_CTRS, false);
344 }
345
346 static void
primitives_generated_pause(struct fd_acc_query * aq,struct fd_batch * batch)347 primitives_generated_pause(struct fd_acc_query *aq,
348 struct fd_batch *batch) assert_dt
349 {
350 struct fd_ringbuffer *ring = batch->draw;
351
352 fd_wfi(batch, ring);
353
354 /* snapshot the end values: */
355 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
356 OUT_RING(ring, CP_REG_TO_MEM_0_64B | CP_REG_TO_MEM_0_CNT(counter_count * 2) |
357 CP_REG_TO_MEM_0_REG(counter_base));
358 primitives_relocw(ring, aq, prim_stop);
359
360 fd6_event_write(batch, ring, STOP_PRIMITIVE_CTRS, false);
361
362 /* result += stop - start: */
363 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
364 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
365 primitives_relocw(ring, aq, result.generated);
366 primitives_reloc(ring, aq, prim_emitted);
367 primitives_reloc(ring, aq,
368 prim_stop[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2])
369 primitives_reloc(
370 ring, aq, prim_start[(REG_A6XX_RBBM_PRIMCTR_8_LO - counter_base) / 2]);
371 }
372
373 static void
primitives_generated_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)374 primitives_generated_result(struct fd_acc_query *aq, void *buf,
375 union pipe_query_result *result)
376 {
377 struct fd6_primitives_sample *ps = buf;
378
379 log_counters(ps);
380
381 result->u64 = ps->result.generated;
382 }
383
384 static const struct fd_acc_sample_provider primitives_generated = {
385 .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
386 .size = sizeof(struct fd6_primitives_sample),
387 .resume = primitives_generated_resume,
388 .pause = primitives_generated_pause,
389 .result = primitives_generated_result,
390 };
391
392 static void
primitives_emitted_resume(struct fd_acc_query * aq,struct fd_batch * batch)393 primitives_emitted_resume(struct fd_acc_query *aq,
394 struct fd_batch *batch) assert_dt
395 {
396 struct fd_ringbuffer *ring = batch->draw;
397
398 fd_wfi(batch, ring);
399 OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
400 primitives_relocw(ring, aq, start[0]);
401
402 fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
403 }
404
405 static void
primitives_emitted_pause(struct fd_acc_query * aq,struct fd_batch * batch)406 primitives_emitted_pause(struct fd_acc_query *aq,
407 struct fd_batch *batch) assert_dt
408 {
409 struct fd_ringbuffer *ring = batch->draw;
410
411 fd_wfi(batch, ring);
412
413 OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
414 primitives_relocw(ring, aq, stop[0]);
415 fd6_event_write(batch, ring, WRITE_PRIMITIVE_COUNTS, false);
416
417 fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
418
419 /* result += stop - start: */
420 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
421 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
422 primitives_relocw(ring, aq, result.emitted);
423 primitives_reloc(ring, aq, result.emitted);
424 primitives_reloc(ring, aq, stop[aq->base.index].emitted);
425 primitives_reloc(ring, aq, start[aq->base.index].emitted);
426 }
427
428 static void
primitives_emitted_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)429 primitives_emitted_result(struct fd_acc_query *aq, void *buf,
430 union pipe_query_result *result)
431 {
432 struct fd6_primitives_sample *ps = buf;
433
434 log_counters(ps);
435
436 result->u64 = ps->result.emitted;
437 }
438
439 static const struct fd_acc_sample_provider primitives_emitted = {
440 .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
441 .size = sizeof(struct fd6_primitives_sample),
442 .resume = primitives_emitted_resume,
443 .pause = primitives_emitted_pause,
444 .result = primitives_emitted_result,
445 };
446
447 /*
448 * Performance Counter (batch) queries:
449 *
450 * Only one of these is active at a time, per design of the gallium
451 * batch_query API design. On perfcntr query tracks N query_types,
452 * each of which has a 'fd_batch_query_entry' that maps it back to
453 * the associated group and counter.
454 */
455
456 struct fd_batch_query_entry {
457 uint8_t gid; /* group-id */
458 uint8_t cid; /* countable-id within the group */
459 };
460
461 struct fd_batch_query_data {
462 struct fd_screen *screen;
463 unsigned num_query_entries;
464 struct fd_batch_query_entry query_entries[];
465 };
466
467 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)468 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
469 {
470 struct fd_batch_query_data *data = aq->query_data;
471 struct fd_screen *screen = data->screen;
472 struct fd_ringbuffer *ring = batch->draw;
473
474 unsigned counters_per_group[screen->num_perfcntr_groups];
475 memset(counters_per_group, 0, sizeof(counters_per_group));
476
477 fd_wfi(batch, ring);
478
479 /* configure performance counters for the requested queries: */
480 for (unsigned i = 0; i < data->num_query_entries; i++) {
481 struct fd_batch_query_entry *entry = &data->query_entries[i];
482 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
483 unsigned counter_idx = counters_per_group[entry->gid]++;
484
485 debug_assert(counter_idx < g->num_counters);
486
487 OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
488 OUT_RING(ring, g->countables[entry->cid].selector);
489 }
490
491 memset(counters_per_group, 0, sizeof(counters_per_group));
492
493 /* and snapshot the start values */
494 for (unsigned i = 0; i < data->num_query_entries; i++) {
495 struct fd_batch_query_entry *entry = &data->query_entries[i];
496 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
497 unsigned counter_idx = counters_per_group[entry->gid]++;
498 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
499
500 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
501 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
502 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
503 OUT_RELOC(ring, query_sample_idx(aq, i, start));
504 }
505 }
506
507 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)508 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
509 {
510 struct fd_batch_query_data *data = aq->query_data;
511 struct fd_screen *screen = data->screen;
512 struct fd_ringbuffer *ring = batch->draw;
513
514 unsigned counters_per_group[screen->num_perfcntr_groups];
515 memset(counters_per_group, 0, sizeof(counters_per_group));
516
517 fd_wfi(batch, ring);
518
519 /* TODO do we need to bother to turn anything off? */
520
521 /* snapshot the end values: */
522 for (unsigned i = 0; i < data->num_query_entries; i++) {
523 struct fd_batch_query_entry *entry = &data->query_entries[i];
524 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
525 unsigned counter_idx = counters_per_group[entry->gid]++;
526 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
527
528 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
529 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
530 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
531 OUT_RELOC(ring, query_sample_idx(aq, i, stop));
532 }
533
534 /* and compute the result: */
535 for (unsigned i = 0; i < data->num_query_entries; i++) {
536 /* result += stop - start: */
537 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
538 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
539 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
540 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
541 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
542 OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
543 }
544 }
545
546 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,void * buf,union pipe_query_result * result)547 perfcntr_accumulate_result(struct fd_acc_query *aq, void *buf,
548 union pipe_query_result *result)
549 {
550 struct fd_batch_query_data *data = aq->query_data;
551 struct fd6_query_sample *sp = buf;
552
553 for (unsigned i = 0; i < data->num_query_entries; i++) {
554 result->batch[i].u64 = sp[i].result;
555 }
556 }
557
558 static const struct fd_acc_sample_provider perfcntr = {
559 .query_type = FD_QUERY_FIRST_PERFCNTR,
560 .always = true,
561 .resume = perfcntr_resume,
562 .pause = perfcntr_pause,
563 .result = perfcntr_accumulate_result,
564 };
565
566 static struct pipe_query *
fd6_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)567 fd6_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
568 unsigned *query_types)
569 {
570 struct fd_context *ctx = fd_context(pctx);
571 struct fd_screen *screen = ctx->screen;
572 struct fd_query *q;
573 struct fd_acc_query *aq;
574 struct fd_batch_query_data *data;
575
576 data = CALLOC_VARIANT_LENGTH_STRUCT(
577 fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
578
579 data->screen = screen;
580 data->num_query_entries = num_queries;
581
582 /* validate the requested query_types and ensure we don't try
583 * to request more query_types of a given group than we have
584 * counters:
585 */
586 unsigned counters_per_group[screen->num_perfcntr_groups];
587 memset(counters_per_group, 0, sizeof(counters_per_group));
588
589 for (unsigned i = 0; i < num_queries; i++) {
590 unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
591
592 /* verify valid query_type, ie. is it actually a perfcntr? */
593 if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
594 (idx >= screen->num_perfcntr_queries)) {
595 mesa_loge("invalid batch query query_type: %u", query_types[i]);
596 goto error;
597 }
598
599 struct fd_batch_query_entry *entry = &data->query_entries[i];
600 struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
601
602 entry->gid = pq->group_id;
603
604 /* the perfcntr_queries[] table flattens all the countables
605 * for each group in series, ie:
606 *
607 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
608 *
609 * So to find the countable index just step back through the
610 * table to find the first entry with the same group-id.
611 */
612 while (pq > screen->perfcntr_queries) {
613 pq--;
614 if (pq->group_id == entry->gid)
615 entry->cid++;
616 }
617
618 if (counters_per_group[entry->gid] >=
619 screen->perfcntr_groups[entry->gid].num_counters) {
620 mesa_loge("too many counters for group %u", entry->gid);
621 goto error;
622 }
623
624 counters_per_group[entry->gid]++;
625 }
626
627 q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
628 aq = fd_acc_query(q);
629
630 /* sample buffer size is based on # of queries: */
631 aq->size = num_queries * sizeof(struct fd6_query_sample);
632 aq->query_data = data;
633
634 return (struct pipe_query *)q;
635
636 error:
637 free(data);
638 return NULL;
639 }
640
641 void
fd6_query_context_init(struct pipe_context * pctx)642 fd6_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
643 {
644 struct fd_context *ctx = fd_context(pctx);
645
646 ctx->create_query = fd_acc_create_query;
647 ctx->query_update_batch = fd_acc_query_update_batch;
648
649 ctx->record_timestamp = record_timestamp;
650 ctx->ts_to_ns = ticks_to_ns;
651
652 pctx->create_batch_query = fd6_create_batch_query;
653
654 fd_acc_query_register_provider(pctx, &occlusion_counter);
655 fd_acc_query_register_provider(pctx, &occlusion_predicate);
656 fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
657
658 fd_acc_query_register_provider(pctx, &time_elapsed);
659 fd_acc_query_register_provider(pctx, ×tamp);
660
661 fd_acc_query_register_provider(pctx, &primitives_generated);
662 fd_acc_query_register_provider(pctx, &primitives_emitted);
663 }
664