• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * Copyright 2019-2020 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <stdint.h>
8 #include "pipe/p_defines.h"
9 #include "util/bitset.h"
10 #include "util/macros.h"
11 #include "util/ralloc.h"
12 #include "util/u_inlines.h"
13 #include "util/u_prim.h"
14 #include "agx_bo.h"
15 #include "agx_device.h"
16 #include "agx_state.h"
17 #include "pool.h"
18 
19 static bool
is_occlusion(struct agx_query * query)20 is_occlusion(struct agx_query *query)
21 {
22    switch (query->type) {
23    case PIPE_QUERY_OCCLUSION_COUNTER:
24    case PIPE_QUERY_OCCLUSION_PREDICATE:
25    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
26       return true;
27    default:
28       return false;
29    }
30 }
31 
32 static bool
is_timer(struct agx_query * query)33 is_timer(struct agx_query *query)
34 {
35    switch (query->type) {
36    case PIPE_QUERY_TIMESTAMP:
37    case PIPE_QUERY_TIME_ELAPSED:
38       return true;
39    default:
40       return false;
41    }
42 }
43 
44 #define AGX_MAX_OCCLUSION_QUERIES (65536)
45 
46 struct agx_oq_heap {
47    /* The GPU allocation itself */
48    struct agx_bo *bo;
49 
50    /* Bitset of query indices that are in use */
51    BITSET_DECLARE(available, AGX_MAX_OCCLUSION_QUERIES);
52 };
53 
54 static void
agx_destroy_oq_heap(void * heap_)55 agx_destroy_oq_heap(void *heap_)
56 {
57    struct agx_oq_heap *heap = heap_;
58    agx_bo_unreference(heap->bo);
59 }
60 
61 static struct agx_oq_heap *
agx_alloc_oq_heap(struct agx_context * ctx)62 agx_alloc_oq_heap(struct agx_context *ctx)
63 {
64    struct agx_oq_heap *heap = rzalloc(ctx, struct agx_oq_heap);
65    ralloc_set_destructor(heap, agx_destroy_oq_heap);
66 
67    heap->bo = agx_bo_create(agx_device(ctx->base.screen),
68                             AGX_MAX_OCCLUSION_QUERIES * sizeof(uint64_t),
69                             AGX_BO_WRITEBACK, "Occlusion query heap");
70 
71    /* At the start, everything is available */
72    BITSET_ONES(heap->available);
73 
74    return heap;
75 }
76 
77 static struct agx_oq_heap *
agx_get_oq_heap(struct agx_context * ctx)78 agx_get_oq_heap(struct agx_context *ctx)
79 {
80    if (!ctx->oq)
81       ctx->oq = agx_alloc_oq_heap(ctx);
82 
83    return ctx->oq;
84 }
85 
86 static struct agx_ptr
agx_alloc_oq(struct agx_context * ctx)87 agx_alloc_oq(struct agx_context *ctx)
88 {
89    struct agx_oq_heap *heap = agx_get_oq_heap(ctx);
90 
91    /* Find first available */
92    int ffs = BITSET_FFS(heap->available);
93    if (!ffs)
94       return (struct agx_ptr){NULL, 0};
95 
96    /* Allocate it */
97    unsigned index = ffs - 1;
98    BITSET_CLEAR(heap->available, index);
99 
100    unsigned offset = index * sizeof(uint64_t);
101 
102    return (struct agx_ptr){
103       (uint8_t *)heap->bo->ptr.cpu + offset,
104       heap->bo->ptr.gpu + offset,
105    };
106 }
107 
108 static unsigned
agx_oq_index(struct agx_context * ctx,struct agx_query * q)109 agx_oq_index(struct agx_context *ctx, struct agx_query *q)
110 {
111    assert(is_occlusion(q));
112 
113    return (q->ptr.gpu - ctx->oq->bo->ptr.gpu) / sizeof(uint64_t);
114 }
115 
116 static void
agx_free_oq(struct agx_context * ctx,struct agx_query * q)117 agx_free_oq(struct agx_context *ctx, struct agx_query *q)
118 {
119    struct agx_oq_heap *heap = agx_get_oq_heap(ctx);
120    unsigned index = agx_oq_index(ctx, q);
121 
122    assert(index < AGX_MAX_OCCLUSION_QUERIES);
123    assert(!BITSET_TEST(heap->available, index));
124 
125    BITSET_SET(heap->available, index);
126 }
127 
128 uint64_t
agx_get_occlusion_heap(struct agx_batch * batch)129 agx_get_occlusion_heap(struct agx_batch *batch)
130 {
131    if (!batch->ctx->oq)
132       return 0;
133 
134    struct agx_bo *bo = batch->ctx->oq->bo;
135 
136    if (agx_batch_uses_bo(batch, bo))
137       return bo->ptr.gpu;
138    else
139       return 0;
140 }
141 
142 static struct pipe_query *
agx_create_query(struct pipe_context * ctx,unsigned query_type,unsigned index)143 agx_create_query(struct pipe_context *ctx, unsigned query_type, unsigned index)
144 {
145    struct agx_query *query = calloc(1, sizeof(struct agx_query));
146 
147    query->type = query_type;
148    query->index = index;
149 
150    /* Set all writer generations to a sentinel that will always compare as
151     * false, since nothing writes to no queries.
152     */
153    for (unsigned i = 0; i < ARRAY_SIZE(query->writer_generation); ++i) {
154       query->writer_generation[i] = UINT64_MAX;
155    }
156 
157    if (is_occlusion(query)) {
158       query->ptr = agx_alloc_oq(agx_context(ctx));
159    } else {
160       /* TODO: a BO for the query is wasteful, but we benefit from BO list
161        * tracking / reference counting to deal with lifetimes.
162        */
163       query->bo = agx_bo_create(agx_device(ctx->screen), sizeof(uint64_t) * 2,
164                                 AGX_BO_WRITEBACK, "Query");
165       query->ptr = query->bo->ptr;
166    }
167 
168    if (!query->ptr.gpu) {
169       free(query);
170       return NULL;
171    }
172 
173    return (struct pipe_query *)query;
174 }
175 
176 static void
sync_query_writers(struct agx_context * ctx,struct agx_query * query,const char * reason)177 sync_query_writers(struct agx_context *ctx, struct agx_query *query,
178                    const char *reason)
179 {
180    STATIC_ASSERT(ARRAY_SIZE(ctx->batches.generation) == AGX_MAX_BATCHES);
181    STATIC_ASSERT(ARRAY_SIZE(ctx->batches.slots) == AGX_MAX_BATCHES);
182    STATIC_ASSERT(ARRAY_SIZE(query->writer_generation) == AGX_MAX_BATCHES);
183 
184    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
185       if (query->writer_generation[i] == ctx->batches.generation[i])
186          agx_sync_batch_for_reason(ctx, &ctx->batches.slots[i], reason);
187    }
188 }
189 
190 static bool
is_query_busy(struct agx_context * ctx,struct agx_query * query)191 is_query_busy(struct agx_context *ctx, struct agx_query *query)
192 {
193    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
194       if (query->writer_generation[i] == ctx->batches.generation[i])
195          return true;
196    }
197 
198    return false;
199 }
200 
201 static void
agx_destroy_query(struct pipe_context * pctx,struct pipe_query * pquery)202 agx_destroy_query(struct pipe_context *pctx, struct pipe_query *pquery)
203 {
204    struct agx_context *ctx = agx_context(pctx);
205    struct agx_query *query = (struct agx_query *)pquery;
206 
207    /* We don't reference count the occlusion query allocations, so we need to
208     * sync writers when destroying so we can freely write from the CPU after
209     * it's destroyed, since the driver will assume an available query is idle.
210     *
211     * For other queries, the BO itself is reference counted after the pipe_query
212     * is destroyed so we don't need to flush.
213     */
214    if (is_occlusion(query)) {
215       sync_query_writers(ctx, query, "Occlusion query destroy");
216       agx_free_oq(ctx, query);
217    } else {
218       agx_bo_unreference(query->bo);
219    }
220 
221    free(pquery);
222 }
223 
224 static bool
agx_begin_query(struct pipe_context * pctx,struct pipe_query * pquery)225 agx_begin_query(struct pipe_context *pctx, struct pipe_query *pquery)
226 {
227    struct agx_context *ctx = agx_context(pctx);
228    struct agx_query *query = (struct agx_query *)pquery;
229 
230    ctx->dirty |= AGX_DIRTY_QUERY;
231 
232    switch (query->type) {
233    case PIPE_QUERY_OCCLUSION_COUNTER:
234    case PIPE_QUERY_OCCLUSION_PREDICATE:
235    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
236       ctx->occlusion_query = query;
237       break;
238 
239    case PIPE_QUERY_PRIMITIVES_GENERATED:
240       ctx->prims_generated[query->index] = query;
241       break;
242 
243    case PIPE_QUERY_PRIMITIVES_EMITTED:
244       ctx->tf_prims_generated[query->index] = query;
245       break;
246 
247    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
248       ctx->tf_overflow[query->index] = query;
249       break;
250 
251    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
252       ctx->tf_any_overflow = query;
253       break;
254 
255    case PIPE_QUERY_TIME_ELAPSED:
256       ctx->time_elapsed = query;
257       break;
258 
259    case PIPE_QUERY_TIMESTAMP:
260       /* No-op */
261       break;
262 
263    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
264       assert(query->index < ARRAY_SIZE(ctx->pipeline_statistics));
265       ctx->pipeline_statistics[query->index] = query;
266       break;
267 
268    default:
269       return false;
270    }
271 
272    /* begin_query zeroes, sync so we can do that write from the CPU */
273    sync_query_writers(ctx, query, "Query overwritten");
274 
275    uint64_t *ptr = query->ptr.cpu;
276    ptr[0] = 0;
277 
278    if (query->type == PIPE_QUERY_TIME_ELAPSED) {
279       /* Timestamp begin in second record, the timestamp end in the first */
280       ptr[1] = UINT64_MAX;
281    }
282 
283    return true;
284 }
285 
286 static bool
agx_end_query(struct pipe_context * pctx,struct pipe_query * pquery)287 agx_end_query(struct pipe_context *pctx, struct pipe_query *pquery)
288 {
289    struct agx_context *ctx = agx_context(pctx);
290    struct agx_device *dev = agx_device(pctx->screen);
291    struct agx_query *query = (struct agx_query *)pquery;
292 
293    ctx->dirty |= AGX_DIRTY_QUERY;
294 
295    switch (query->type) {
296    case PIPE_QUERY_OCCLUSION_COUNTER:
297    case PIPE_QUERY_OCCLUSION_PREDICATE:
298    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
299       ctx->occlusion_query = NULL;
300       return true;
301    case PIPE_QUERY_PRIMITIVES_GENERATED:
302       ctx->prims_generated[query->index] = NULL;
303       return true;
304    case PIPE_QUERY_PRIMITIVES_EMITTED:
305       ctx->tf_prims_generated[query->index] = NULL;
306       return true;
307    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
308       ctx->tf_overflow[query->index] = NULL;
309       return true;
310    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
311       ctx->tf_any_overflow = NULL;
312       return true;
313    case PIPE_QUERY_TIME_ELAPSED:
314       ctx->time_elapsed = NULL;
315       return true;
316    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
317       assert(query->index < ARRAY_SIZE(ctx->pipeline_statistics));
318       ctx->pipeline_statistics[query->index] = NULL;
319       return true;
320    case PIPE_QUERY_TIMESTAMP: {
321       /* Timestamp logically written now, set up batches to MAX their finish
322        * time in. If there are no batches, it's just the current time stamp.
323        */
324       agx_add_timestamp_end_query(ctx, query);
325 
326       uint64_t *value = query->ptr.cpu;
327       *value = agx_get_gpu_timestamp(dev);
328 
329       return true;
330    }
331    default:
332       return false;
333    }
334 }
335 
336 static bool
agx_get_query_result(struct pipe_context * pctx,struct pipe_query * pquery,bool wait,union pipe_query_result * vresult)337 agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
338                      bool wait, union pipe_query_result *vresult)
339 {
340    struct agx_query *query = (struct agx_query *)pquery;
341    struct agx_context *ctx = agx_context(pctx);
342    struct agx_device *dev = agx_device(pctx->screen);
343 
344    /* TODO: Honour `wait` */
345    sync_query_writers(ctx, query, "Reading query results");
346 
347    uint64_t *ptr = query->ptr.cpu;
348    uint64_t value = *ptr;
349 
350    switch (query->type) {
351    case PIPE_QUERY_OCCLUSION_PREDICATE:
352    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
353       vresult->b = value;
354       return true;
355 
356    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
357    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
358       vresult->b = value > 0;
359       return true;
360 
361    case PIPE_QUERY_OCCLUSION_COUNTER:
362    case PIPE_QUERY_PRIMITIVES_GENERATED:
363    case PIPE_QUERY_PRIMITIVES_EMITTED:
364    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
365       vresult->u64 = value;
366       return true;
367 
368    case PIPE_QUERY_TIMESTAMP:
369       vresult->u64 = agx_gpu_time_to_ns(dev, value);
370       return true;
371 
372    case PIPE_QUERY_TIME_ELAPSED:
373       /* end - begin */
374       vresult->u64 = agx_gpu_time_to_ns(dev, ptr[0] - ptr[1]);
375       return true;
376 
377    default:
378       unreachable("Other queries not yet supported");
379    }
380 }
381 
382 static void
agx_get_query_result_resource(struct pipe_context * pipe,struct pipe_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)383 agx_get_query_result_resource(struct pipe_context *pipe, struct pipe_query *q,
384                               enum pipe_query_flags flags,
385                               enum pipe_query_value_type result_type, int index,
386                               struct pipe_resource *resource, unsigned offset)
387 {
388    struct agx_query *query = (struct agx_query *)q;
389 
390    /* TODO: Don't cheat XXX */
391    struct agx_context *ctx = agx_context(pipe);
392 
393    union pipe_query_result result;
394    if (index < 0) {
395       /* availability */
396       result.u64 = !is_query_busy(ctx, query);
397    } else {
398       bool ready = agx_get_query_result(pipe, q, true, &result);
399       assert(ready);
400 
401       switch (query->type) {
402       case PIPE_QUERY_OCCLUSION_PREDICATE:
403       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
404       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
405       case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
406          result.u64 = result.b;
407          break;
408       default:
409          break;
410       }
411    }
412 
413    /* Clamp to type, arb_query_buffer_object-qbo tests */
414    if (result_type == PIPE_QUERY_TYPE_U32) {
415       result.u32 = MIN2(result.u64, u_uintN_max(32));
416    } else if (result_type == PIPE_QUERY_TYPE_I32) {
417       int64_t x = result.u64;
418       x = MAX2(MIN2(x, u_intN_max(32)), u_intN_min(32));
419       result.u32 = x;
420    }
421 
422    if (result_type <= PIPE_QUERY_TYPE_U32)
423       pipe_buffer_write(pipe, resource, offset, 4, &result.u32);
424    else
425       pipe_buffer_write(pipe, resource, offset, 8, &result.u64);
426 }
427 
428 static void
agx_set_active_query_state(struct pipe_context * pipe,bool enable)429 agx_set_active_query_state(struct pipe_context *pipe, bool enable)
430 {
431    struct agx_context *ctx = agx_context(pipe);
432 
433    ctx->active_queries = enable;
434    ctx->dirty |= AGX_DIRTY_QUERY;
435 }
436 
437 static void
agx_add_query_to_batch(struct agx_batch * batch,struct agx_query * query)438 agx_add_query_to_batch(struct agx_batch *batch, struct agx_query *query)
439 {
440    unsigned idx = agx_batch_idx(batch);
441    struct agx_bo *bo = is_occlusion(query) ? batch->ctx->oq->bo : query->bo;
442 
443    agx_batch_add_bo(batch, bo);
444    query->writer_generation[idx] = batch->ctx->batches.generation[idx];
445 }
446 
447 void
agx_batch_add_timestamp_query(struct agx_batch * batch,struct agx_query * q)448 agx_batch_add_timestamp_query(struct agx_batch *batch, struct agx_query *q)
449 {
450    if (q) {
451       agx_add_query_to_batch(batch, q);
452       util_dynarray_append(&batch->timestamps, struct agx_ptr, q->ptr);
453    }
454 }
455 
456 uint16_t
agx_get_oq_index(struct agx_batch * batch,struct agx_query * query)457 agx_get_oq_index(struct agx_batch *batch, struct agx_query *query)
458 {
459    agx_add_query_to_batch(batch, query);
460    return agx_oq_index(batch->ctx, query);
461 }
462 
463 uint64_t
agx_get_query_address(struct agx_batch * batch,struct agx_query * query)464 agx_get_query_address(struct agx_batch *batch, struct agx_query *query)
465 {
466    agx_add_query_to_batch(batch, query);
467    return query->ptr.gpu;
468 }
469 
470 void
agx_finish_batch_queries(struct agx_batch * batch,uint64_t begin_ts,uint64_t end_ts)471 agx_finish_batch_queries(struct agx_batch *batch, uint64_t begin_ts,
472                          uint64_t end_ts)
473 {
474    /* Remove the batch as write from all queries by incrementing the generation
475     * of the batch.
476     */
477    batch->ctx->batches.generation[agx_batch_idx(batch)]++;
478 
479    /* Write out timestamps */
480    util_dynarray_foreach(&batch->timestamps, struct agx_ptr, it) {
481       uint64_t *ptr = it->cpu;
482 
483       ptr[0] = MAX2(ptr[0], end_ts);
484       ptr[1] = MIN2(ptr[1], begin_ts);
485    }
486 }
487 
488 void
agx_query_increment_cpu(struct agx_context * ctx,struct agx_query * query,uint64_t increment)489 agx_query_increment_cpu(struct agx_context *ctx, struct agx_query *query,
490                         uint64_t increment)
491 {
492    if (!query)
493       return;
494 
495    sync_query_writers(ctx, query, "CPU query increment");
496 
497    uint64_t *value = query->ptr.cpu;
498    *value += increment;
499 }
500 
501 static void
agx_render_condition(struct pipe_context * pipe,struct pipe_query * query,bool condition,enum pipe_render_cond_flag mode)502 agx_render_condition(struct pipe_context *pipe, struct pipe_query *query,
503                      bool condition, enum pipe_render_cond_flag mode)
504 {
505    struct agx_context *ctx = agx_context(pipe);
506 
507    ctx->cond_query = query;
508    ctx->cond_cond = condition;
509    ctx->cond_mode = mode;
510 }
511 
512 bool
agx_render_condition_check_inner(struct agx_context * ctx)513 agx_render_condition_check_inner(struct agx_context *ctx)
514 {
515    assert(ctx->cond_query != NULL && "precondition");
516 
517    perf_debug_ctx(ctx, "Implementing conditional rendering on the CPU");
518 
519    union pipe_query_result res = {0};
520    bool wait = ctx->cond_mode != PIPE_RENDER_COND_NO_WAIT &&
521                ctx->cond_mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
522 
523    struct pipe_query *pq = (struct pipe_query *)ctx->cond_query;
524 
525    if (agx_get_query_result(&ctx->base, pq, wait, &res))
526       return res.u64 != ctx->cond_cond;
527 
528    return true;
529 }
530 
531 void
agx_init_query_functions(struct pipe_context * pctx)532 agx_init_query_functions(struct pipe_context *pctx)
533 {
534    pctx->create_query = agx_create_query;
535    pctx->destroy_query = agx_destroy_query;
536    pctx->begin_query = agx_begin_query;
537    pctx->end_query = agx_end_query;
538    pctx->get_query_result = agx_get_query_result;
539    pctx->get_query_result_resource = agx_get_query_result_resource;
540    pctx->set_active_query_state = agx_set_active_query_state;
541    pctx->render_condition = agx_render_condition;
542 
543    /* By default queries are active */
544    agx_context(pctx)->active_queries = true;
545 }
546