1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * Copyright 2019-2020 Collabora, Ltd.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <stdint.h>
8 #include "pipe/p_defines.h"
9 #include "util/bitset.h"
10 #include "util/macros.h"
11 #include "util/ralloc.h"
12 #include "util/u_inlines.h"
13 #include "util/u_prim.h"
14 #include "agx_bo.h"
15 #include "agx_device.h"
16 #include "agx_state.h"
17 #include "pool.h"
18
19 static bool
is_occlusion(struct agx_query * query)20 is_occlusion(struct agx_query *query)
21 {
22 switch (query->type) {
23 case PIPE_QUERY_OCCLUSION_COUNTER:
24 case PIPE_QUERY_OCCLUSION_PREDICATE:
25 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
26 return true;
27 default:
28 return false;
29 }
30 }
31
32 static bool
is_timer(struct agx_query * query)33 is_timer(struct agx_query *query)
34 {
35 switch (query->type) {
36 case PIPE_QUERY_TIMESTAMP:
37 case PIPE_QUERY_TIME_ELAPSED:
38 return true;
39 default:
40 return false;
41 }
42 }
43
44 #define AGX_MAX_OCCLUSION_QUERIES (65536)
45
46 struct agx_oq_heap {
47 /* The GPU allocation itself */
48 struct agx_bo *bo;
49
50 /* Bitset of query indices that are in use */
51 BITSET_DECLARE(available, AGX_MAX_OCCLUSION_QUERIES);
52 };
53
54 static void
agx_destroy_oq_heap(void * heap_)55 agx_destroy_oq_heap(void *heap_)
56 {
57 struct agx_oq_heap *heap = heap_;
58 agx_bo_unreference(heap->bo);
59 }
60
61 static struct agx_oq_heap *
agx_alloc_oq_heap(struct agx_context * ctx)62 agx_alloc_oq_heap(struct agx_context *ctx)
63 {
64 struct agx_oq_heap *heap = rzalloc(ctx, struct agx_oq_heap);
65 ralloc_set_destructor(heap, agx_destroy_oq_heap);
66
67 heap->bo = agx_bo_create(agx_device(ctx->base.screen),
68 AGX_MAX_OCCLUSION_QUERIES * sizeof(uint64_t),
69 AGX_BO_WRITEBACK, "Occlusion query heap");
70
71 /* At the start, everything is available */
72 BITSET_ONES(heap->available);
73
74 return heap;
75 }
76
77 static struct agx_oq_heap *
agx_get_oq_heap(struct agx_context * ctx)78 agx_get_oq_heap(struct agx_context *ctx)
79 {
80 if (!ctx->oq)
81 ctx->oq = agx_alloc_oq_heap(ctx);
82
83 return ctx->oq;
84 }
85
86 static struct agx_ptr
agx_alloc_oq(struct agx_context * ctx)87 agx_alloc_oq(struct agx_context *ctx)
88 {
89 struct agx_oq_heap *heap = agx_get_oq_heap(ctx);
90
91 /* Find first available */
92 int ffs = BITSET_FFS(heap->available);
93 if (!ffs)
94 return (struct agx_ptr){NULL, 0};
95
96 /* Allocate it */
97 unsigned index = ffs - 1;
98 BITSET_CLEAR(heap->available, index);
99
100 unsigned offset = index * sizeof(uint64_t);
101
102 return (struct agx_ptr){
103 (uint8_t *)heap->bo->ptr.cpu + offset,
104 heap->bo->ptr.gpu + offset,
105 };
106 }
107
108 static unsigned
agx_oq_index(struct agx_context * ctx,struct agx_query * q)109 agx_oq_index(struct agx_context *ctx, struct agx_query *q)
110 {
111 assert(is_occlusion(q));
112
113 return (q->ptr.gpu - ctx->oq->bo->ptr.gpu) / sizeof(uint64_t);
114 }
115
116 static void
agx_free_oq(struct agx_context * ctx,struct agx_query * q)117 agx_free_oq(struct agx_context *ctx, struct agx_query *q)
118 {
119 struct agx_oq_heap *heap = agx_get_oq_heap(ctx);
120 unsigned index = agx_oq_index(ctx, q);
121
122 assert(index < AGX_MAX_OCCLUSION_QUERIES);
123 assert(!BITSET_TEST(heap->available, index));
124
125 BITSET_SET(heap->available, index);
126 }
127
128 uint64_t
agx_get_occlusion_heap(struct agx_batch * batch)129 agx_get_occlusion_heap(struct agx_batch *batch)
130 {
131 if (!batch->ctx->oq)
132 return 0;
133
134 struct agx_bo *bo = batch->ctx->oq->bo;
135
136 if (agx_batch_uses_bo(batch, bo))
137 return bo->ptr.gpu;
138 else
139 return 0;
140 }
141
142 static struct pipe_query *
agx_create_query(struct pipe_context * ctx,unsigned query_type,unsigned index)143 agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
144 {
145 struct agx_query *query = calloc(1, sizeof(struct agx_query));
146
147 query->type = query_type;
148 query->index = index;
149
150 /* Set all writer generations to a sentinel that will always compare as
151 * false, since nothing writes to no queries.
152 */
153 for (unsigned i = 0; i < ARRAY_SIZE(query->writer_generation); ++i) {
154 query->writer_generation[i] = UINT64_MAX;
155 }
156
157 if (is_occlusion(query)) {
158 query->ptr = agx_alloc_oq(agx_context(ctx));
159 } else {
160 /* TODO: a BO for the query is wasteful, but we benefit from BO list
161 * tracking / reference counting to deal with lifetimes.
162 */
163 query->bo = agx_bo_create(agx_device(ctx->screen), sizeof(uint64_t) * 2,
164 AGX_BO_WRITEBACK, "Query");
165 query->ptr = query->bo->ptr;
166 }
167
168 if (!query->ptr.gpu) {
169 free(query);
170 return NULL;
171 }
172
173 return (struct pipe_query *)query;
174 }
175
176 static void
sync_query_writers(struct agx_context * ctx,struct agx_query * query,const char * reason)177 sync_query_writers(struct agx_context *ctx, struct agx_query *query,
178 const char *reason)
179 {
180 STATIC_ASSERT(ARRAY_SIZE(ctx->batches.generation) == AGX_MAX_BATCHES);
181 STATIC_ASSERT(ARRAY_SIZE(ctx->batches.slots) == AGX_MAX_BATCHES);
182 STATIC_ASSERT(ARRAY_SIZE(query->writer_generation) == AGX_MAX_BATCHES);
183
184 for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
185 if (query->writer_generation[i] == ctx->batches.generation[i])
186 agx_sync_batch_for_reason(ctx, &ctx->batches.slots[i], reason);
187 }
188 }
189
190 static bool
is_query_busy(struct agx_context * ctx,struct agx_query * query)191 is_query_busy(struct agx_context *ctx, struct agx_query *query)
192 {
193 for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
194 if (query->writer_generation[i] == ctx->batches.generation[i])
195 return true;
196 }
197
198 return false;
199 }
200
201 static void
agx_destroy_query(struct pipe_context * pctx,struct pipe_query * pquery)202 agx_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
203 {
204 struct agx_context *ctx = agx_context(pctx);
205 struct agx_query *query = (struct agx_query *)pquery;
206
207 /* We don't reference count the occlusion query allocations, so we need to
208 * sync writers when destroying so we can freely write from the CPU after
209 * it's destroyed, since the driver will assume an available query is idle.
210 *
211 * For other queries, the BO itself is reference counted after the pipe_query
212 * is destroyed so we don't need to flush.
213 */
214 if (is_occlusion(query)) {
215 sync_query_writers(ctx, query, "Occlusion query destroy");
216 agx_free_oq(ctx, query);
217 } else {
218 agx_bo_unreference(query->bo);
219 }
220
221 free(pquery);
222 }
223
224 static bool
agx_begin_query(struct pipe_context * pctx,struct pipe_query * pquery)225 agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
226 {
227 struct agx_context *ctx = agx_context(pctx);
228 struct agx_query *query = (struct agx_query *)pquery;
229
230 ctx->dirty |= AGX_DIRTY_QUERY;
231
232 switch (query->type) {
233 case PIPE_QUERY_OCCLUSION_COUNTER:
234 case PIPE_QUERY_OCCLUSION_PREDICATE:
235 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
236 ctx->occlusion_query = query;
237 break;
238
239 case PIPE_QUERY_PRIMITIVES_GENERATED:
240 ctx->prims_generated[query->index] = query;
241 break;
242
243 case PIPE_QUERY_PRIMITIVES_EMITTED:
244 ctx->tf_prims_generated[query->index] = query;
245 break;
246
247 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
248 ctx->tf_overflow[query->index] = query;
249 break;
250
251 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
252 ctx->tf_any_overflow = query;
253 break;
254
255 case PIPE_QUERY_TIME_ELAPSED:
256 ctx->time_elapsed = query;
257 break;
258
259 case PIPE_QUERY_TIMESTAMP:
260 /* No-op */
261 break;
262
263 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
264 assert(query->index < ARRAY_SIZE(ctx->pipeline_statistics));
265 ctx->pipeline_statistics[query->index] = query;
266 break;
267
268 default:
269 return false;
270 }
271
272 /* begin_query zeroes, sync so we can do that write from the CPU */
273 sync_query_writers(ctx, query, "Query overwritten");
274
275 uint64_t *ptr = query->ptr.cpu;
276 ptr[0] = 0;
277
278 if (query->type == PIPE_QUERY_TIME_ELAPSED) {
279 /* Timestamp begin in second record, the timestamp end in the first */
280 ptr[1] = UINT64_MAX;
281 }
282
283 return true;
284 }
285
286 static bool
agx_end_query(struct pipe_context * pctx,struct pipe_query * pquery)287 agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
288 {
289 struct agx_context *ctx = agx_context(pctx);
290 struct agx_device *dev = agx_device(pctx->screen);
291 struct agx_query *query = (struct agx_query *)pquery;
292
293 ctx->dirty |= AGX_DIRTY_QUERY;
294
295 switch (query->type) {
296 case PIPE_QUERY_OCCLUSION_COUNTER:
297 case PIPE_QUERY_OCCLUSION_PREDICATE:
298 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
299 ctx->occlusion_query = NULL;
300 return true;
301 case PIPE_QUERY_PRIMITIVES_GENERATED:
302 ctx->prims_generated[query->index] = NULL;
303 return true;
304 case PIPE_QUERY_PRIMITIVES_EMITTED:
305 ctx->tf_prims_generated[query->index] = NULL;
306 return true;
307 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
308 ctx->tf_overflow[query->index] = NULL;
309 return true;
310 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
311 ctx->tf_any_overflow = NULL;
312 return true;
313 case PIPE_QUERY_TIME_ELAPSED:
314 ctx->time_elapsed = NULL;
315 return true;
316 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
317 assert(query->index < ARRAY_SIZE(ctx->pipeline_statistics));
318 ctx->pipeline_statistics[query->index] = NULL;
319 return true;
320 case PIPE_QUERY_TIMESTAMP: {
321 /* Timestamp logically written now, set up batches to MAX their finish
322 * time in. If there are no batches, it's just the current time stamp.
323 */
324 agx_add_timestamp_end_query(ctx, query);
325
326 uint64_t *value = query->ptr.cpu;
327 *value = agx_get_gpu_timestamp(dev);
328
329 return true;
330 }
331 default:
332 return false;
333 }
334 }
335
336 static bool
agx_get_query_result(struct pipe_context * pctx,struct pipe_query * pquery,bool wait,union pipe_query_result * vresult)337 agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
338 bool wait, union pipe_query_result *vresult)
339 {
340 struct agx_query *query = (struct agx_query *)pquery;
341 struct agx_context *ctx = agx_context(pctx);
342 struct agx_device *dev = agx_device(pctx->screen);
343
344 /* TODO: Honour `wait` */
345 sync_query_writers(ctx, query, "Reading query results");
346
347 uint64_t *ptr = query->ptr.cpu;
348 uint64_t value = *ptr;
349
350 switch (query->type) {
351 case PIPE_QUERY_OCCLUSION_PREDICATE:
352 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
353 vresult->b = value;
354 return true;
355
356 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
357 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
358 vresult->b = value > 0;
359 return true;
360
361 case PIPE_QUERY_OCCLUSION_COUNTER:
362 case PIPE_QUERY_PRIMITIVES_GENERATED:
363 case PIPE_QUERY_PRIMITIVES_EMITTED:
364 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
365 vresult->u64 = value;
366 return true;
367
368 case PIPE_QUERY_TIMESTAMP:
369 vresult->u64 = agx_gpu_time_to_ns(dev, value);
370 return true;
371
372 case PIPE_QUERY_TIME_ELAPSED:
373 /* end - begin */
374 vresult->u64 = agx_gpu_time_to_ns(dev, ptr[0] - ptr[1]);
375 return true;
376
377 default:
378 unreachable("Other queries not yet supported");
379 }
380 }
381
382 static void
agx_get_query_result_resource(struct pipe_context * pipe,struct pipe_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)383 agx_get_query_result_resource(struct pipe_context *pipe, struct pipe_query *q,
384 enum pipe_query_flags flags,
385 enum pipe_query_value_type result_type, int index,
386 struct pipe_resource *resource, unsigned offset)
387 {
388 struct agx_query *query = (struct agx_query *)q;
389
390 /* TODO: Don't cheat XXX */
391 struct agx_context *ctx = agx_context(pipe);
392
393 union pipe_query_result result;
394 if (index < 0) {
395 /* availability */
396 result.u64 = !is_query_busy(ctx, query);
397 } else {
398 bool ready = agx_get_query_result(pipe, q, true, &result);
399 assert(ready);
400
401 switch (query->type) {
402 case PIPE_QUERY_OCCLUSION_PREDICATE:
403 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
404 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
405 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
406 result.u64 = result.b;
407 break;
408 default:
409 break;
410 }
411 }
412
413 /* Clamp to type, arb_query_buffer_object-qbo tests */
414 if (result_type == PIPE_QUERY_TYPE_U32) {
415 result.u32 = MIN2(result.u64, u_uintN_max(32));
416 } else if (result_type == PIPE_QUERY_TYPE_I32) {
417 int64_t x = result.u64;
418 x = MAX2(MIN2(x, u_intN_max(32)), u_intN_min(32));
419 result.u32 = x;
420 }
421
422 if (result_type <= PIPE_QUERY_TYPE_U32)
423 pipe_buffer_write(pipe, resource, offset, 4, &result.u32);
424 else
425 pipe_buffer_write(pipe, resource, offset, 8, &result.u64);
426 }
427
428 static void
agx_set_active_query_state(struct pipe_context * pipe,bool enable)429 agx_set_active_query_state(struct pipe_context *pipe, bool enable)
430 {
431 struct agx_context *ctx = agx_context(pipe);
432
433 ctx->active_queries = enable;
434 ctx->dirty |= AGX_DIRTY_QUERY;
435 }
436
437 static void
agx_add_query_to_batch(struct agx_batch * batch,struct agx_query * query)438 agx_add_query_to_batch(struct agx_batch *batch, struct agx_query *query)
439 {
440 unsigned idx = agx_batch_idx(batch);
441 struct agx_bo *bo = is_occlusion(query) ? batch->ctx->oq->bo : query->bo;
442
443 agx_batch_add_bo(batch, bo);
444 query->writer_generation[idx] = batch->ctx->batches.generation[idx];
445 }
446
447 void
agx_batch_add_timestamp_query(struct agx_batch * batch,struct agx_query * q)448 agx_batch_add_timestamp_query(struct agx_batch *batch, struct agx_query *q)
449 {
450 if (q) {
451 agx_add_query_to_batch(batch, q);
452 util_dynarray_append(&batch->timestamps, struct agx_ptr, q->ptr);
453 }
454 }
455
456 uint16_t
agx_get_oq_index(struct agx_batch * batch,struct agx_query * query)457 agx_get_oq_index(struct agx_batch *batch, struct agx_query *query)
458 {
459 agx_add_query_to_batch(batch, query);
460 return agx_oq_index(batch->ctx, query);
461 }
462
463 uint64_t
agx_get_query_address(struct agx_batch * batch,struct agx_query * query)464 agx_get_query_address(struct agx_batch *batch, struct agx_query *query)
465 {
466 agx_add_query_to_batch(batch, query);
467 return query->ptr.gpu;
468 }
469
470 void
agx_finish_batch_queries(struct agx_batch * batch,uint64_t begin_ts,uint64_t end_ts)471 agx_finish_batch_queries(struct agx_batch *batch, uint64_t begin_ts,
472 uint64_t end_ts)
473 {
474 /* Remove the batch as write from all queries by incrementing the generation
475 * of the batch.
476 */
477 batch->ctx->batches.generation[agx_batch_idx(batch)]++;
478
479 /* Write out timestamps */
480 util_dynarray_foreach(&batch->timestamps, struct agx_ptr, it) {
481 uint64_t *ptr = it->cpu;
482
483 ptr[0] = MAX2(ptr[0], end_ts);
484 ptr[1] = MIN2(ptr[1], begin_ts);
485 }
486 }
487
488 void
agx_query_increment_cpu(struct agx_context * ctx,struct agx_query * query,uint64_t increment)489 agx_query_increment_cpu(struct agx_context *ctx, struct agx_query *query,
490 uint64_t increment)
491 {
492 if (!query)
493 return;
494
495 sync_query_writers(ctx, query, "CPU query increment");
496
497 uint64_t *value = query->ptr.cpu;
498 *value += increment;
499 }
500
501 static void
agx_render_condition(struct pipe_context * pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)502 agx_render_condition(struct pipe_context *pipe, struct pipe_query *query,
503 bool condition, enum pipe_render_cond_flag mode)
504 {
505 struct agx_context *ctx = agx_context(pipe);
506
507 ctx->cond_query = query;
508 ctx->cond_cond = condition;
509 ctx->cond_mode = mode;
510 }
511
512 bool
agx_render_condition_check_inner(struct agx_context * ctx)513 agx_render_condition_check_inner(struct agx_context *ctx)
514 {
515 assert(ctx->cond_query != NULL && "precondition");
516
517 perf_debug_ctx(ctx, "Implementing conditional rendering on the CPU");
518
519 union pipe_query_result res = {0};
520 bool wait = ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
521 ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
522
523 struct pipe_query *pq = (struct pipe_query *)ctx->cond_query;
524
525 if (agx_get_query_result(&ctx->base, pq, wait, &res))
526 return res.u64 != ctx->cond_cond;
527
528 return true;
529 }
530
531 void
agx_init_query_functions(struct pipe_context * pctx)532 agx_init_query_functions(struct pipe_context *pctx)
533 {
534 pctx->create_query = agx_create_query;
535 pctx->destroy_query = agx_destroy_query;
536 pctx->begin_query = agx_begin_query;
537 pctx->end_query = agx_end_query;
538 pctx->get_query_result = agx_get_query_result;
539 pctx->get_query_result_resource = agx_get_query_result_resource;
540 pctx->set_active_query_state = agx_set_active_query_state;
541 pctx->render_condition = agx_render_condition;
542
543 /* By default queries are active */
544 agx_context(pctx)->active_queries = true;
545 }
546