• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "zink_query.h"
2 
3 #include "zink_context.h"
4 #include "zink_clear.h"
5 #include "zink_program.h"
6 #include "zink_resource.h"
7 #include "zink_screen.h"
8 
9 #include "util/u_dump.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12 
13 #define NUM_QUERIES 500
14 
15 #define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0)
16 
17 struct zink_query_pool {
18    struct list_head list;
19    VkQueryType vk_query_type;
20    VkQueryPipelineStatisticFlags pipeline_stats;
21    VkQueryPool query_pool;
22    unsigned last_range;
23    unsigned refcount;
24 };
25 
26 struct zink_query_buffer {
27    struct list_head list;
28    unsigned num_results;
29    struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS];
30 };
31 
32 struct zink_vk_query {
33    struct zink_query_pool *pool;
34    unsigned query_id;
35    bool needs_reset;
36    bool started;
37    uint32_t refcount;
38 };
39 
40 struct zink_query_start {
41    union {
42       struct {
43          bool have_gs;
44          bool have_xfb;
45          bool was_line_loop;
46       };
47       uint32_t data;
48    };
49    struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS];
50 };
51 
52 struct zink_query {
53    struct threaded_query base;
54    enum pipe_query_type type;
55 
56    /* Everytime the gallium query needs
57     * another vulkan query, add a new start.
58     */
59    struct util_dynarray starts;
60    unsigned start_offset;
61 
62    VkQueryType vkqtype;
63    unsigned index;
64    bool precise;
65 
66    bool active; /* query is considered active by vk */
67    bool needs_reset; /* query is considered active by vk and cannot be destroyed */
68    bool dead; /* query should be destroyed when its fence finishes */
69    bool needs_update; /* query needs to update its qbos */
70    bool needs_rast_discard_workaround; /* query needs discard disabled */
71    bool suspended;
72    bool started_in_rp; //needs to be stopped in rp
73 
74    struct list_head active_list;
75 
76    struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
77    bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */
78 
79    struct zink_batch_usage *batch_uses; //batch that the query was started in
80 
81    struct list_head buffers;
82    union {
83       struct zink_query_buffer *curr_qbo;
84       struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED
85    };
86 
87    struct zink_resource *predicate;
88    bool predicate_dirty;
89 };
90 
91 static const struct pipe_driver_query_info zink_specific_queries[] = {
92    {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }},
93 };
94 
95 static inline int
get_num_starts(struct zink_query * q)96 get_num_starts(struct zink_query *q)
97 {
98    return util_dynarray_num_elements(&q->starts, struct zink_query_start);
99 }
100 
101 static void
102 update_query_id(struct zink_context *ctx, struct zink_query *q);
103 
104 
105 static VkQueryPipelineStatisticFlags
pipeline_statistic_convert(enum pipe_statistics_query_index idx)106 pipeline_statistic_convert(enum pipe_statistics_query_index idx)
107 {
108    unsigned map[] = {
109       [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
110       [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
111       [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
112       [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
113       [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
114       [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
115       [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
116       [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
117       [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
118       [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
119       [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
120    };
121    assert(idx < ARRAY_SIZE(map));
122    return map[idx];
123 }
124 
125 static void
begin_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index,VkQueryControlFlags flags)126 begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index,
127                        VkQueryControlFlags flags)
128 {
129    struct zink_batch *batch = &ctx->batch;
130    if (!vkq->started) {
131       VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
132                                      vkq->pool->query_pool,
133                                      vkq->query_id,
134                                      flags,
135                                      index);
136       vkq->started = true;
137    }
138 }
139 
140 static void
end_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index)141 end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index)
142 {
143    struct zink_batch *batch = &ctx->batch;
144    if (vkq->started) {
145       VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf,
146                                    vkq->pool->query_pool,
147                                    vkq->query_id, index);
148       vkq->started = false;
149    }
150 }
151 
152 static void
reset_vk_query_pool(struct zink_context * ctx,struct zink_vk_query * vkq)153 reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq)
154 {
155    struct zink_batch *batch = &ctx->batch;
156    if (vkq->needs_reset) {
157       VKCTX(CmdResetQueryPool)(batch->state->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1);
158       batch->state->has_barriers = true;
159    }
160    vkq->needs_reset = false;
161 }
162 
163 void
zink_context_destroy_query_pools(struct zink_context * ctx)164 zink_context_destroy_query_pools(struct zink_context *ctx)
165 {
166    struct zink_screen *screen = zink_screen(ctx->base.screen);
167    list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) {
168       VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
169       list_del(&pool->list);
170       FREE(pool);
171    }
172 }
173 
174 static struct zink_query_pool *
find_or_allocate_qp(struct zink_context * ctx,struct zink_query * q,unsigned idx)175 find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx)
176 {
177    VkQueryPipelineStatisticFlags pipeline_stats = 0;
178    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
179       pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
180                        VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
181    else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
182       pipeline_stats = pipeline_statistic_convert(q->index);
183 
184    VkQueryType vk_query_type = q->vkqtype;
185    /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
186    if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) {
187       vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
188       pipeline_stats = 0;
189    }
190 
191    struct zink_screen *screen = zink_screen(ctx->base.screen);
192    list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) {
193       if (pool->vk_query_type == vk_query_type) {
194          if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
195             if (pool->pipeline_stats == pipeline_stats)
196                return pool;
197          } else
198             return pool;
199       }
200    }
201 
202    struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool);
203    if (!new_pool)
204       return NULL;
205 
206    new_pool->vk_query_type = vk_query_type;
207    new_pool->pipeline_stats = pipeline_stats;
208 
209    VkQueryPoolCreateInfo pool_create = {0};
210    pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
211    pool_create.queryType = vk_query_type;
212    pool_create.queryCount = NUM_QUERIES;
213    pool_create.pipelineStatistics = pipeline_stats;
214 
215    VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool);
216    if (status != VK_SUCCESS) {
217       mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status));
218       FREE(new_pool);
219       return NULL;
220    }
221 
222    list_addtail(&new_pool->list, &ctx->query_pools);
223    return new_pool;
224 }
225 
226 static void
227 update_qbo(struct zink_context *ctx, struct zink_query *q);
228 static void
229 reset_qbos(struct zink_context *ctx, struct zink_query *q);
230 
231 
232 static bool
is_emulated_primgen(const struct zink_query * q)233 is_emulated_primgen(const struct zink_query *q)
234 {
235    return q->type == PIPE_QUERY_PRIMITIVES_GENERATED &&
236           q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT;
237 }
238 
239 static inline unsigned
get_num_query_pools(struct zink_query * q)240 get_num_query_pools(struct zink_query *q)
241 {
242    if (is_emulated_primgen(q))
243       return 2;
244    return 1;
245 }
246 
247 static inline unsigned
get_num_queries(struct zink_query * q)248 get_num_queries(struct zink_query *q)
249 {
250    if (is_emulated_primgen(q))
251       return 2;
252    if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
253       return PIPE_MAX_VERTEX_STREAMS;
254    return 1;
255 }
256 
257 static inline unsigned
get_num_results(struct zink_query * q)258 get_num_results(struct zink_query *q)
259 {
260    if (q->type < PIPE_QUERY_DRIVER_SPECIFIC &&
261        q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
262       return 1;
263    switch (q->type) {
264    case PIPE_QUERY_OCCLUSION_COUNTER:
265    case PIPE_QUERY_OCCLUSION_PREDICATE:
266    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
267    case PIPE_QUERY_TIME_ELAPSED:
268    case PIPE_QUERY_TIMESTAMP:
269    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
270       return 1;
271    case PIPE_QUERY_PRIMITIVES_GENERATED:
272    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
273    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
274    case PIPE_QUERY_PRIMITIVES_EMITTED:
275       return 2;
276    default:
277       debug_printf("unknown query: %s\n",
278                    util_str_query_type(q->type, true));
279       unreachable("zink: unknown query type");
280    }
281 }
282 
283 static void
timestamp_to_nanoseconds(struct zink_screen * screen,uint64_t * timestamp)284 timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
285 {
286    /* The number of valid bits in a timestamp value is determined by
287     * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.
288     * - 17.5. Timestamp Queries
289     */
290    if (screen->timestamp_valid_bits < 64)
291       *timestamp &= (1ull << screen->timestamp_valid_bits) - 1;
292 
293    /* The number of nanoseconds it takes for a timestamp value to be incremented by 1
294     * can be obtained from VkPhysicalDeviceLimits::timestampPeriod
295     * - 17.5. Timestamp Queries
296     */
297    *timestamp *= (double)screen->info.props.limits.timestampPeriod;
298 }
299 
300 static VkQueryType
convert_query_type(struct zink_screen * screen,enum pipe_query_type query_type,bool * precise)301 convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise)
302 {
303    *precise = false;
304    switch (query_type) {
305    case PIPE_QUERY_OCCLUSION_COUNTER:
306       *precise = true;
307       FALLTHROUGH;
308    case PIPE_QUERY_OCCLUSION_PREDICATE:
309    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
310       return VK_QUERY_TYPE_OCCLUSION;
311    case PIPE_QUERY_TIME_ELAPSED:
312    case PIPE_QUERY_TIMESTAMP:
313       return VK_QUERY_TYPE_TIMESTAMP;
314    case PIPE_QUERY_PRIMITIVES_GENERATED:
315       return screen->info.have_EXT_primitives_generated_query ?
316              VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT :
317              VK_QUERY_TYPE_PIPELINE_STATISTICS;
318    case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
319       return VK_QUERY_TYPE_PIPELINE_STATISTICS;
320    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
321    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
322    case PIPE_QUERY_PRIMITIVES_EMITTED:
323       return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
324    default:
325       debug_printf("unknown query: %s\n",
326                    util_str_query_type(query_type, true));
327       unreachable("zink: unknown query type");
328    }
329 }
330 
331 static bool
needs_stats_list(struct zink_query * query)332 needs_stats_list(struct zink_query *query)
333 {
334    return is_emulated_primgen(query) ||
335           query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
336           query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
337 }
338 
339 static bool
is_time_query(struct zink_query * query)340 is_time_query(struct zink_query *query)
341 {
342    return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;
343 }
344 
345 static bool
is_so_overflow_query(struct zink_query * query)346 is_so_overflow_query(struct zink_query *query)
347 {
348    return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
349 }
350 
351 static bool
is_bool_query(struct zink_query * query)352 is_bool_query(struct zink_query *query)
353 {
354    return is_so_overflow_query(query) ||
355           query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
356           query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
357           query->type == PIPE_QUERY_GPU_FINISHED;
358 }
359 
360 static bool
qbo_append(struct pipe_screen * screen,struct zink_query * query)361 qbo_append(struct pipe_screen *screen, struct zink_query *query)
362 {
363    if (query->curr_qbo && query->curr_qbo->list.next)
364       return true;
365    struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
366    if (!qbo)
367       return false;
368    int num_buffers = get_num_queries(query);
369 
370    for (unsigned i = 0; i < num_buffers; i++) {
371       qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
372                                            PIPE_USAGE_STAGING,
373                                            /* this is the maximum possible size of the results in a given buffer */
374                                            (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t));
375       if (!qbo->buffers[i])
376          goto fail;
377    }
378    list_addtail(&qbo->list, &query->buffers);
379 
380    return true;
381 fail:
382    for (unsigned i = 0; i < num_buffers; i++)
383       pipe_resource_reference(&qbo->buffers[i], NULL);
384    FREE(qbo);
385    return false;
386 }
387 
388 static void
unref_vk_pool(struct zink_context * ctx,struct zink_query_pool * pool)389 unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool)
390 {
391    if (!pool || --pool->refcount)
392       return;
393    util_dynarray_append(&ctx->batch.state->dead_querypools, VkQueryPool, pool->query_pool);
394    if (list_is_linked(&pool->list))
395       list_del(&pool->list);
396    FREE(pool);
397 }
398 
399 static void
unref_vk_query(struct zink_context * ctx,struct zink_vk_query * vkq)400 unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq)
401 {
402    if (!vkq)
403       return;
404    unref_vk_pool(ctx, vkq->pool);
405    vkq->refcount--;
406    if (vkq->refcount == 0)
407       FREE(vkq);
408 }
409 
410 static void
destroy_query(struct zink_context * ctx,struct zink_query * query)411 destroy_query(struct zink_context *ctx, struct zink_query *query)
412 {
413    ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen);
414    assert(zink_screen_usage_check_completion(screen, query->batch_uses));
415    struct zink_query_buffer *qbo, *next;
416 
417    struct zink_query_start *starts = query->starts.data;
418    unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start);
419    for (unsigned j = 0; j < num_starts; j++) {
420       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
421          unref_vk_query(ctx, starts[j].vkq[i]);
422       }
423    }
424 
425    util_dynarray_fini(&query->starts);
426    LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
427       for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++)
428          pipe_resource_reference(&qbo->buffers[i], NULL);
429       FREE(qbo);
430    }
431    pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
432    FREE(query);
433 }
434 
435 static void
reset_qbo(struct zink_query * q)436 reset_qbo(struct zink_query *q)
437 {
438    q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);
439    q->curr_qbo->num_results = 0;
440 }
441 
442 static void
query_pool_get_range(struct zink_context * ctx,struct zink_query * q)443 query_pool_get_range(struct zink_context *ctx, struct zink_query *q)
444 {
445    bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
446    struct zink_query_start *start;
447    int num_queries = get_num_queries(q);
448    if (!is_timestamp || get_num_starts(q) == 0) {
449       size_t size = q->starts.capacity;
450       start = util_dynarray_grow(&q->starts, struct zink_query_start, 1);
451       if (size != q->starts.capacity) {
452          /* when resizing, always zero the new data to avoid garbage */
453          uint8_t *data = q->starts.data;
454          memset(data + size, 0, q->starts.capacity - size);
455       }
456    } else {
457       start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
458    }
459    start->data = 0;
460 
461    unsigned num_pools = get_num_query_pools(q);
462    for (unsigned i = 0; i < num_queries; i++) {
463       int pool_idx = num_pools > 1 ? i : 0;
464       /* try and find the active query for this */
465       struct zink_vk_query *vkq;
466       int xfb_idx = num_queries == 4 ? i : q->index;
467       if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
468            (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) {
469          vkq = ctx->curr_xfb_queries[xfb_idx];
470          vkq->refcount++;
471          vkq->pool->refcount++;
472       } else {
473          struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx);
474          if (pool->last_range == NUM_QUERIES) {
475             list_del(&pool->list);
476             pool = find_or_allocate_qp(ctx, q, pool_idx);
477          }
478          vkq = CALLOC_STRUCT(zink_vk_query);
479          if (!vkq) {
480             mesa_loge("ZINK: failed to allocate vkq!");
481             return;
482          }
483 
484          pool->refcount++;
485          vkq->refcount = 1;
486          vkq->needs_reset = true;
487          vkq->pool = pool;
488          vkq->started = false;
489          vkq->query_id = pool->last_range++;
490       }
491       unref_vk_query(ctx, start->vkq[i]);
492       start->vkq[i] = vkq;
493    }
494 }
495 
496 static struct pipe_query *
zink_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)497 zink_create_query(struct pipe_context *pctx,
498                   unsigned query_type, unsigned index)
499 {
500    struct zink_screen *screen = zink_screen(pctx->screen);
501    struct zink_query *query = CALLOC_STRUCT(zink_query);
502 
503    if (!query)
504       return NULL;
505    list_inithead(&query->buffers);
506 
507    query->index = index;
508    query->type = query_type;
509 
510    if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
511       return (struct pipe_query *)query;
512 
513    if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
514       return (struct pipe_query *)query;
515    query->vkqtype = convert_query_type(screen, query_type, &query->precise);
516    if (query->vkqtype == -1)
517       return NULL;
518 
519    util_dynarray_init(&query->starts, NULL);
520 
521    assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
522 
523    /* use emulated path for drivers without full support */
524    if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index &&
525        !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams)
526       query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS;
527 
528    if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
529       query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard;
530    } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
531       query->needs_rast_discard_workaround = true;
532    }
533 
534    if (!qbo_append(pctx->screen, query))
535       goto fail;
536    struct zink_batch *batch = &zink_context(pctx)->batch;
537    batch->has_work = true;
538    query->needs_reset = true;
539    query->predicate_dirty = true;
540    if (query->type == PIPE_QUERY_TIMESTAMP) {
541       query->active = true;
542       /* defer pool reset until end_query since we're guaranteed to be threadsafe then */
543       reset_qbo(query);
544    }
545    return (struct pipe_query *)query;
546 fail:
547    destroy_query(zink_context(pctx), query);
548    return NULL;
549 }
550 
551 static void
zink_destroy_query(struct pipe_context * pctx,struct pipe_query * q)552 zink_destroy_query(struct pipe_context *pctx,
553                    struct pipe_query *q)
554 {
555    struct zink_query *query = (struct zink_query *)q;
556 
557    /* only destroy if this query isn't active on any batches,
558     * otherwise just mark dead and wait
559     */
560    if (query->batch_uses) {
561       query->dead = true;
562       return;
563    }
564 
565    destroy_query(zink_context(pctx), query);
566 }
567 
568 void
zink_prune_query(struct zink_batch_state * bs,struct zink_query * query)569 zink_prune_query(struct zink_batch_state *bs, struct zink_query *query)
570 {
571    if (!zink_batch_usage_matches(query->batch_uses, bs))
572       return;
573    query->batch_uses = NULL;
574    if (query->dead)
575       destroy_query(bs->ctx, query);
576 }
577 
578 static void
check_query_results(struct zink_query * query,union pipe_query_result * result,int num_starts,uint64_t * results,uint64_t * xfb_results)579 check_query_results(struct zink_query *query, union pipe_query_result *result,
580                     int num_starts, uint64_t *results, uint64_t *xfb_results)
581 {
582    uint64_t last_val = 0;
583    int result_size = get_num_results(query);
584    int idx = 0;
585    util_dynarray_foreach(&query->starts, struct zink_query_start, start) {
586       unsigned i = idx * result_size;
587       idx++;
588       switch (query->type) {
589       case PIPE_QUERY_OCCLUSION_PREDICATE:
590       case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
591       case PIPE_QUERY_GPU_FINISHED:
592          result->b |= results[i] != 0;
593          break;
594 
595       case PIPE_QUERY_TIME_ELAPSED:
596       case PIPE_QUERY_TIMESTAMP:
597          /* the application can sum the differences between all N queries to determine the total execution time.
598           * - 17.5. Timestamp Queries
599           */
600          if (query->type != PIPE_QUERY_TIME_ELAPSED || i)
601             result->u64 += results[i] - last_val;
602          last_val = results[i];
603          break;
604       case PIPE_QUERY_OCCLUSION_COUNTER:
605          result->u64 += results[i];
606          break;
607       case PIPE_QUERY_PRIMITIVES_GENERATED:
608          if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
609             result->u64 += results[i];
610          else if (start->have_xfb || query->index)
611             result->u64 += xfb_results[i + 1];
612          else
613             /* if a given draw had a geometry shader, we need to use the first result */
614             result->u64 += results[i + !start->have_gs];
615          break;
616       case PIPE_QUERY_PRIMITIVES_EMITTED:
617          /* A query pool created with this type will capture 2 integers -
618           * numPrimitivesWritten and numPrimitivesNeeded -
619           * for the specified vertex stream output from the last vertex processing stage.
620           * - from VK_EXT_transform_feedback spec
621           */
622          result->u64 += results[i];
623          break;
624       case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
625       case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
626          /* A query pool created with this type will capture 2 integers -
627           * numPrimitivesWritten and numPrimitivesNeeded -
628           * for the specified vertex stream output from the last vertex processing stage.
629           * - from VK_EXT_transform_feedback spec
630           */
631          if (start->have_xfb)
632             result->b |= results[i] != results[i + 1];
633          break;
634       case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
635          switch (query->index) {
636          case PIPE_STAT_QUERY_IA_VERTICES:
637             result->u64 += start->was_line_loop ? results[i] / 2 : results[i];
638             break;
639          default:
640             result->u64 += results[i];
641             break;
642          }
643          break;
644 
645       default:
646          debug_printf("unhandled query type: %s\n",
647                       util_str_query_type(query->type, true));
648          unreachable("unexpected query type");
649       }
650    }
651 }
652 
653 static bool
get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)654 get_query_result(struct pipe_context *pctx,
655                       struct pipe_query *q,
656                       bool wait,
657                       union pipe_query_result *result)
658 {
659    struct zink_screen *screen = zink_screen(pctx->screen);
660    struct zink_query *query = (struct zink_query *)q;
661    unsigned flags = PIPE_MAP_READ;
662 
663    if (!wait)
664       flags |= PIPE_MAP_DONTBLOCK;
665    if (query->base.flushed)
666       /* this is not a context-safe operation; ensure map doesn't use slab alloc */
667       flags |= PIPE_MAP_THREAD_SAFE;
668 
669    util_query_clear_result(result, query->type);
670 
671    int num_starts = get_num_starts(query);
672    /* no results: return zero */
673    if (!num_starts)
674       return true;
675    int result_size = get_num_results(query) * sizeof(uint64_t);
676    int num_maps = get_num_queries(query);
677 
678    struct zink_query_buffer *qbo;
679    struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 };
680    LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
681       uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL };
682       bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP;
683       if (!qbo->num_results)
684          continue;
685 
686       for (unsigned i = 0; i < num_maps; i++) {
687          results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0,
688                                             (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]);
689          if (!results[i]) {
690             if (wait)
691                debug_printf("zink: qbo read failed!");
692             goto fail;
693          }
694       }
695       if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
696          for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) {
697             check_query_results(query, result, num_starts, results[i], NULL);
698          }
699       } else
700          check_query_results(query, result, num_starts, results[0], results[1]);
701 
702       for (unsigned i = 0 ; i < num_maps; i++)
703          pipe_buffer_unmap(pctx, xfer[i]);
704 
705       /* if overflow is detected we can stop */
706       if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b)
707          break;
708    }
709 
710    if (is_time_query(query))
711       timestamp_to_nanoseconds(screen, &result->u64);
712 
713    return true;
714 fail:
715    for (unsigned i = 0 ; i < num_maps; i++)
716       if (xfer[i])
717          pipe_buffer_unmap(pctx, xfer[i]);
718    return false;
719 }
720 
721 static void
force_cpu_read(struct zink_context * ctx,struct pipe_query * pquery,enum pipe_query_value_type result_type,struct pipe_resource * pres,unsigned offset)722 force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)
723 {
724    struct pipe_context *pctx = &ctx->base;
725    unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
726    struct zink_query *query = (struct zink_query*)pquery;
727    union pipe_query_result result = {0};
728 
729    if (query->needs_update)
730       update_qbo(ctx, query);
731 
732    bool success = get_query_result(pctx, pquery, true, &result);
733    if (!success) {
734       debug_printf("zink: getting query result failed\n");
735       return;
736    }
737 
738    if (result_type <= PIPE_QUERY_TYPE_U32) {
739       uint32_t u32;
740       uint32_t limit;
741       if (result_type == PIPE_QUERY_TYPE_I32)
742          limit = INT_MAX;
743       else
744          limit = UINT_MAX;
745       if (is_bool_query(query))
746          u32 = result.b;
747       else
748          u32 = MIN2(limit, result.u64);
749       tc_buffer_write(pctx, pres, offset, result_size, &u32);
750    } else {
751       uint64_t u64;
752       if (is_bool_query(query))
753          u64 = result.b;
754       else
755          u64 = result.u64;
756       tc_buffer_write(pctx, pres, offset, result_size, &u64);
757    }
758 }
759 
760 static void
copy_pool_results_to_buffer(struct zink_context * ctx,struct zink_query * query,VkQueryPool pool,unsigned query_id,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)761 copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,
762                             unsigned query_id, struct zink_resource *res, unsigned offset,
763                             int num_results, VkQueryResultFlags flags)
764 {
765    struct zink_batch *batch = &ctx->batch;
766    unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
767    unsigned base_result_size = get_num_results(query) * type_size;
768    unsigned result_size = base_result_size * num_results;
769    if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
770       result_size += type_size;
771 
772    bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results);
773 
774    zink_batch_no_rp(ctx);
775    /* if it's a single query that doesn't need special handling, we can copy it and be done */
776    zink_batch_reference_resource_rw(batch, res, true);
777    res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
778    res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
779    util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
780    assert(query_id < NUM_QUERIES);
781    res->obj->unordered_read = res->obj->unordered_write = false;
782    VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer,
783                                   offset, base_result_size, flags);
784    zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker);
785 }
786 
787 static void
copy_results_to_buffer(struct zink_context * ctx,struct zink_query * query,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)788 copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
789 {
790    struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
791    copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags);
792 }
793 
794 
795 static void
reset_query_range(struct zink_context * ctx,struct zink_query * q)796 reset_query_range(struct zink_context *ctx, struct zink_query *q)
797 {
798    int num_queries = get_num_queries(q);
799    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
800    for (unsigned i = 0; i < num_queries; i++) {
801       reset_vk_query_pool(ctx, start->vkq[i]);
802    }
803 }
804 
805 static void
reset_qbos(struct zink_context * ctx,struct zink_query * q)806 reset_qbos(struct zink_context *ctx, struct zink_query *q)
807 {
808    if (q->needs_update)
809       update_qbo(ctx, q);
810 
811    q->needs_reset = false;
812    /* create new qbo for non-timestamp queries:
813     * timestamp queries should never need more than 2 entries in the qbo
814     */
815    if (q->type == PIPE_QUERY_TIMESTAMP)
816       return;
817    if (qbo_append(ctx->base.screen, q))
818       reset_qbo(q);
819    else
820       debug_printf("zink: qbo alloc failed on reset!");
821 }
822 
823 static inline unsigned
get_buffer_offset(struct zink_query * q)824 get_buffer_offset(struct zink_query *q)
825 {
826    return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t);
827 }
828 
829 static void
update_qbo(struct zink_context * ctx,struct zink_query * q)830 update_qbo(struct zink_context *ctx, struct zink_query *q)
831 {
832    struct zink_query_buffer *qbo = q->curr_qbo;
833    unsigned num_starts = get_num_starts(q);
834    struct zink_query_start *starts = q->starts.data;
835    bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
836    /* timestamp queries just write to offset 0 always */
837    int num_queries = get_num_queries(q);
838    unsigned num_results = qbo->num_results;
839    for (unsigned i = 0; i < num_queries; i++) {
840       unsigned start_offset = q->start_offset;
841       while (start_offset < num_starts) {
842          unsigned num_merged_copies = 0;
843          VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool;
844          unsigned base_id = starts[start_offset].vkq[i]->query_id;
845          /* iterate over all the starts to see how many can be merged */
846          for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) {
847             if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies)
848                break;
849          }
850          assert(num_merged_copies);
851          unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t);
852          unsigned offset = is_timestamp ? 0 : cur_offset;
853          copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id,
854                                     zink_resource(qbo->buffers[i]),
855                                     offset,
856                                     num_merged_copies,
857                                     /*
858                                        there is an implicit execution dependency from
859                                        each such query command to all query commands previously submitted to the same queue. There
860                                        is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
861                                        include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
862                                        the results of vkCmdEndQuery are available.
863 
864                                     * - Chapter 18. Queries
865                                     */
866                                     VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
867          if (!is_timestamp)
868             q->curr_qbo->num_results += num_merged_copies;
869          start_offset += num_merged_copies;
870       }
871    }
872    q->start_offset += q->curr_qbo->num_results - num_results;
873 
874 
875    if (is_timestamp)
876       q->curr_qbo->num_results = 1;
877 
878    q->needs_update = false;
879 }
880 
881 static void
begin_query(struct zink_context * ctx,struct zink_batch * batch,struct zink_query * q)882 begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
883 {
884    VkQueryControlFlags flags = 0;
885 
886    if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
887       return;
888 
889    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->batch.in_rp) {
890       /* refuse to start CS queries in renderpasses */
891       if (!list_is_linked(&q->active_list))
892          list_addtail(&q->active_list, &ctx->suspended_queries);
893       q->suspended = true;
894       return;
895    }
896 
897    zink_flush_dgc_if_enabled(ctx);
898 
899    update_query_id(ctx, q);
900    q->predicate_dirty = true;
901    if (q->needs_reset)
902       reset_qbos(ctx, q);
903    reset_query_range(ctx, q);
904    q->active = true;
905    batch->has_work = true;
906 
907    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
908    if (q->type == PIPE_QUERY_TIME_ELAPSED) {
909       VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
910       if (!batch->in_rp)
911          update_qbo(ctx, q);
912       zink_batch_usage_set(&q->batch_uses, batch->state);
913       _mesa_set_add(&batch->state->active_queries, q);
914    }
915    /* ignore the rest of begin_query for timestamps */
916    if (is_time_query(q))
917       return;
918 
919    /* A query must either begin and end inside the same subpass of a render pass
920       instance, or must both begin and end outside of a render pass instance
921       (i.e. contain entire render pass instances).
922       - 18.2. Query Operation
923     */
924    q->started_in_rp = ctx->batch.in_rp;
925 
926    if (q->precise)
927       flags |= VK_QUERY_CONTROL_PRECISE_BIT;
928 
929    if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
930        is_emulated_primgen(q) ||
931        q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
932       struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
933       assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq);
934       ctx->curr_xfb_queries[q->index] = vkq;
935 
936       begin_vk_query_indexed(ctx, vkq, q->index, flags);
937    } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
938       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
939          assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]);
940          ctx->curr_xfb_queries[i] = start->vkq[i];
941 
942          begin_vk_query_indexed(ctx, start->vkq[i], i, flags);
943       }
944    } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
945       begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags);
946    }
947    if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
948       VKCTX(CmdBeginQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags);
949    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES)  {
950       assert(!ctx->vertices_query);
951       ctx->vertices_query = q;
952    }
953    if (needs_stats_list(q))
954       list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
955    zink_batch_usage_set(&q->batch_uses, batch->state);
956    _mesa_set_add(&batch->state->active_queries, q);
957    if (q->needs_rast_discard_workaround) {
958       ctx->primitives_generated_active = true;
959       if (zink_set_rasterizer_discard(ctx, true))
960          zink_set_null_fs(ctx);
961    }
962 }
963 
964 static bool
zink_begin_query(struct pipe_context * pctx,struct pipe_query * q)965 zink_begin_query(struct pipe_context *pctx,
966                  struct pipe_query *q)
967 {
968    struct zink_query *query = (struct zink_query *)q;
969    struct zink_context *ctx = zink_context(pctx);
970    struct zink_batch *batch = &ctx->batch;
971 
972    /* drop all past results */
973    reset_qbo(query);
974 
975    if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
976       ctx->occlusion_query_active = true;
977    if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
978       ctx->fs_query_active = true;
979 
980    query->predicate_dirty = true;
981 
982    util_dynarray_clear(&query->starts);
983    query->start_offset = 0;
984 
985    if (batch->in_rp) {
986       begin_query(ctx, batch, query);
987    } else {
988       /* never directly start queries out of renderpass, always defer */
989       list_addtail(&query->active_list, &ctx->suspended_queries);
990       query->suspended = true;
991       if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
992          ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
993    }
994 
995    return true;
996 }
997 
998 static void
update_query_id(struct zink_context * ctx,struct zink_query * q)999 update_query_id(struct zink_context *ctx, struct zink_query *q)
1000 {
1001    query_pool_get_range(ctx, q);
1002    ctx->batch.has_work = true;
1003    q->has_draws = false;
1004 }
1005 
1006 static void
end_query(struct zink_context * ctx,struct zink_batch * batch,struct zink_query * q)1007 end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
1008 {
1009    if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1010       return;
1011 
1012    zink_flush_dgc_if_enabled(ctx);
1013 
1014    ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
1015    assert(qbo);
1016    assert(!is_time_query(q));
1017    q->active = false;
1018    assert(q->started_in_rp == batch->in_rp);
1019    struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
1020 
1021    if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
1022        is_emulated_primgen(q) ||
1023        q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
1024       struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
1025 
1026       end_vk_query_indexed(ctx, vkq, q->index);
1027       ctx->curr_xfb_queries[q->index] = NULL;
1028    }
1029    else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1030       for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
1031          end_vk_query_indexed(ctx, start->vkq[i], i);
1032          ctx->curr_xfb_queries[i] = NULL;
1033       }
1034    } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
1035       end_vk_query_indexed(ctx, start->vkq[0], q->index);
1036    }
1037    if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT &&
1038        q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q))
1039       VKCTX(CmdEndQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1040 
1041    if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
1042        q->index == PIPE_STAT_QUERY_IA_VERTICES)
1043       ctx->vertices_query = NULL;
1044 
1045    if (needs_stats_list(q))
1046       list_delinit(&q->stats_list);
1047 
1048    q->needs_update = true;
1049    if (q->needs_rast_discard_workaround) {
1050       ctx->primitives_generated_active = false;
1051       if (zink_set_rasterizer_discard(ctx, false))
1052          zink_set_null_fs(ctx);
1053    }
1054 }
1055 
1056 static bool
zink_end_query(struct pipe_context * pctx,struct pipe_query * q)1057 zink_end_query(struct pipe_context *pctx,
1058                struct pipe_query *q)
1059 {
1060    struct zink_context *ctx = zink_context(pctx);
1061    struct zink_query *query = (struct zink_query *)q;
1062    struct zink_batch *batch = &ctx->batch;
1063 
1064    if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1065       return true;
1066 
1067    if (query->type == PIPE_QUERY_GPU_FINISHED) {
1068       pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
1069       return true;
1070    }
1071 
1072    /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
1073    threaded_context_unwrap_sync(pctx);
1074 
1075    if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
1076       ctx->occlusion_query_active = true;
1077    if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
1078       ctx->fs_query_active = true;
1079 
1080    bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active);
1081    if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1082       ctx->primitives_generated_suspended = false;
1083 
1084    if (list_is_linked(&query->stats_list))
1085       list_delinit(&query->stats_list);
1086    if (query->suspended) {
1087       list_delinit(&query->active_list);
1088       query->suspended = false;
1089    }
1090    if (is_time_query(query)) {
1091       update_query_id(ctx, query);
1092       if (query->needs_reset)
1093          reset_qbos(ctx, query);
1094       reset_query_range(ctx, query);
1095       struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1096       VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
1097                                start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1098       zink_batch_usage_set(&query->batch_uses, batch->state);
1099       _mesa_set_add(&batch->state->active_queries, query);
1100       query->needs_update = true;
1101    } else if (query->active) {
1102       /* this should be a tc-optimized query end that doesn't split a renderpass */
1103       if (!query->started_in_rp)
1104          zink_batch_no_rp(ctx);
1105       end_query(ctx, batch, query);
1106    }
1107 
1108    if (unset_null_fs)
1109       zink_set_null_fs(ctx);
1110 
1111    return true;
1112 }
1113 
1114 static bool
zink_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)1115 zink_get_query_result(struct pipe_context *pctx,
1116                       struct pipe_query *q,
1117                       bool wait,
1118                       union pipe_query_result *result)
1119 {
1120    struct zink_query *query = (void*)q;
1121    struct zink_context *ctx = zink_context(pctx);
1122 
1123    if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) {
1124       result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0;
1125       result->timestamp_disjoint.disjoint = false;
1126       return true;
1127    }
1128 
1129    if (query->type == PIPE_QUERY_GPU_FINISHED) {
1130       struct pipe_screen *screen = pctx->screen;
1131 
1132       result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx,
1133                                         query->fence, wait ? OS_TIMEOUT_INFINITE : 0);
1134       return result->b;
1135    }
1136 
1137    if (query->type == ZINK_QUERY_RENDER_PASSES) {
1138       result->u64 = ctx->hud.render_passes;
1139       ctx->hud.render_passes = 0;
1140       return true;
1141    }
1142 
1143    if (query->needs_update) {
1144       assert(!ctx->tc || !threaded_query(q)->flushed);
1145       update_qbo(ctx, query);
1146    }
1147 
1148    if (zink_batch_usage_is_unflushed(query->batch_uses)) {
1149       if (!threaded_query(q)->flushed)
1150          pctx->flush(pctx, NULL, 0);
1151       if (!wait)
1152          return false;
1153    }
1154 
1155    return get_query_result(pctx, q, wait, result);
1156 }
1157 
1158 static void
suspend_query(struct zink_context * ctx,struct zink_query * query)1159 suspend_query(struct zink_context *ctx, struct zink_query *query)
1160 {
1161    /* if a query isn't active here then we don't need to reactivate it on the next batch */
1162    if (query->active && !is_time_query(query))
1163       end_query(ctx, &ctx->batch, query);
1164    if (query->needs_update && !ctx->batch.in_rp)
1165       update_qbo(ctx, query);
1166 }
1167 
1168 static void
suspend_queries(struct zink_context * ctx,bool rp_only)1169 suspend_queries(struct zink_context *ctx, bool rp_only)
1170 {
1171    set_foreach(&ctx->batch.state->active_queries, entry) {
1172       struct zink_query *query = (void*)entry->key;
1173       if (query->suspended || (rp_only && !query->started_in_rp))
1174          continue;
1175       if (query->active && !is_time_query(query)) {
1176          /* the fence is going to steal the set off the batch, so we have to copy
1177           * the active queries onto a list
1178           */
1179          list_addtail(&query->active_list, &ctx->suspended_queries);
1180          query->suspended = true;
1181          if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1182             ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
1183       }
1184       suspend_query(ctx, query);
1185    }
1186 }
1187 
1188 void
zink_suspend_queries(struct zink_context * ctx,struct zink_batch * batch)1189 zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)
1190 {
1191    suspend_queries(ctx, false);
1192 }
1193 
1194 void
zink_resume_queries(struct zink_context * ctx,struct zink_batch * batch)1195 zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)
1196 {
1197    struct zink_query *query, *next;
1198    LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1199       list_delinit(&query->active_list);
1200       query->suspended = false;
1201       if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1202          ctx->primitives_generated_suspended = false;
1203       if (query->needs_update && !ctx->batch.in_rp)
1204          update_qbo(ctx, query);
1205       begin_query(ctx, batch, query);
1206    }
1207 }
1208 
1209 void
zink_resume_cs_query(struct zink_context * ctx)1210 zink_resume_cs_query(struct zink_context *ctx)
1211 {
1212    struct zink_query *query, *next;
1213    LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1214       if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) {
1215          list_delinit(&query->active_list);
1216          query->suspended = false;
1217          begin_query(ctx, &ctx->batch, query);
1218       }
1219    }
1220 }
1221 
1222 void
zink_query_renderpass_suspend(struct zink_context * ctx)1223 zink_query_renderpass_suspend(struct zink_context *ctx)
1224 {
1225    suspend_queries(ctx, true);
1226 }
1227 
1228 void
zink_query_update_gs_states(struct zink_context * ctx)1229 zink_query_update_gs_states(struct zink_context *ctx)
1230 {
1231    struct zink_query *query;
1232    bool suspendall = false;
1233    bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1234    bool have_xfb = !!ctx->num_so_targets;
1235 
1236    LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1237       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1238       assert(query->active);
1239       if (query->has_draws) {
1240          if (last_start->have_gs != have_gs ||
1241              last_start->have_xfb != have_xfb) {
1242             suspendall = true;
1243          }
1244       }
1245    }
1246 
1247    if (ctx->vertices_query) {
1248       query = ctx->vertices_query;
1249       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1250       assert(query->active);
1251       if (last_start->was_line_loop != ctx->was_line_loop) {
1252          suspendall = true;
1253       }
1254    }
1255    if (suspendall) {
1256      zink_suspend_queries(ctx, &ctx->batch);
1257      zink_resume_queries(ctx, &ctx->batch);
1258    }
1259 
1260    LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1261       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1262       last_start->have_gs = have_gs;
1263       last_start->have_xfb = have_xfb;
1264       query->has_draws = true;
1265    }
1266    if (ctx->vertices_query) {
1267       query = ctx->vertices_query;
1268       struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1269       last_start->was_line_loop = ctx->was_line_loop;
1270       query->has_draws = true;
1271    }
1272 }
1273 
1274 static void
zink_set_active_query_state(struct pipe_context * pctx,bool enable)1275 zink_set_active_query_state(struct pipe_context *pctx, bool enable)
1276 {
1277    struct zink_context *ctx = zink_context(pctx);
1278    /* unordered blits already disable queries */
1279    if (ctx->unordered_blitting)
1280       return;
1281    ctx->queries_disabled = !enable;
1282 
1283    struct zink_batch *batch = &ctx->batch;
1284    if (ctx->queries_disabled)
1285       zink_suspend_queries(ctx, batch);
1286    else if (ctx->batch.in_rp)
1287       zink_resume_queries(ctx, batch);
1288 }
1289 
1290 void
zink_query_sync(struct zink_context * ctx,struct zink_query * query)1291 zink_query_sync(struct zink_context *ctx, struct zink_query *query)
1292 {
1293    if (query->needs_update)
1294       update_qbo(ctx, query);
1295 }
1296 
1297 void
zink_start_conditional_render(struct zink_context * ctx)1298 zink_start_conditional_render(struct zink_context *ctx)
1299 {
1300    if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active)
1301       return;
1302    struct zink_batch *batch = &ctx->batch;
1303    VkConditionalRenderingFlagsEXT begin_flags = 0;
1304    if (ctx->render_condition.inverted)
1305       begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
1306    VkConditionalRenderingBeginInfoEXT begin_info = {0};
1307    begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
1308    begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
1309    begin_info.flags = begin_flags;
1310    ctx->render_condition.query->predicate->obj->unordered_read = false;
1311    VKCTX(CmdBeginConditionalRenderingEXT)(batch->state->cmdbuf, &begin_info);
1312    zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false);
1313    ctx->render_condition.active = true;
1314 }
1315 
1316 void
zink_stop_conditional_render(struct zink_context * ctx)1317 zink_stop_conditional_render(struct zink_context *ctx)
1318 {
1319    zink_flush_dgc_if_enabled(ctx);
1320    struct zink_batch *batch = &ctx->batch;
1321    zink_clear_apply_conditionals(ctx);
1322    if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active)
1323       return;
1324    VKCTX(CmdEndConditionalRenderingEXT)(batch->state->cmdbuf);
1325    ctx->render_condition.active = false;
1326 }
1327 
1328 static void
zink_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)1329 zink_render_condition(struct pipe_context *pctx,
1330                       struct pipe_query *pquery,
1331                       bool condition,
1332                       enum pipe_render_cond_flag mode)
1333 {
1334    struct zink_context *ctx = zink_context(pctx);
1335    struct zink_query *query = (struct zink_query *)pquery;
1336    zink_batch_no_rp(ctx);
1337    VkQueryResultFlagBits flags = 0;
1338 
1339    zink_flush_dgc_if_enabled(ctx);
1340    if (query == NULL) {
1341       /* force conditional clears if they exist */
1342       if (ctx->clears_enabled && !ctx->batch.in_rp)
1343          zink_batch_rp(ctx);
1344       zink_stop_conditional_render(ctx);
1345       ctx->render_condition_active = false;
1346       ctx->render_condition.query = NULL;
1347       return;
1348    }
1349 
1350    if (!query->predicate) {
1351       struct pipe_resource *pres;
1352 
1353       /* need to create a vulkan buffer to copy the data into */
1354       pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));
1355       if (!pres)
1356          return;
1357 
1358       query->predicate = zink_resource(pres);
1359    }
1360    if (query->predicate_dirty) {
1361       struct zink_resource *res = query->predicate;
1362 
1363       if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)
1364          flags |= VK_QUERY_RESULT_WAIT_BIT;
1365 
1366       flags |= VK_QUERY_RESULT_64_BIT;
1367       int num_results = get_num_starts(query);
1368       if (num_results) {
1369          if (!is_emulated_primgen(query) &&
1370             !is_so_overflow_query(query) &&
1371             num_results == 1) {
1372             copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
1373          } else {
1374             /* these need special handling */
1375             force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
1376          }
1377       } else {
1378          uint64_t zero = 0;
1379          tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero);
1380       }
1381       zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT);
1382       query->predicate_dirty = false;
1383    }
1384    ctx->render_condition.inverted = condition;
1385    ctx->render_condition_active = true;
1386    ctx->render_condition.query = query;
1387    if (ctx->batch.in_rp)
1388       zink_start_conditional_render(ctx);
1389 }
1390 
1391 static void
zink_get_query_result_resource(struct pipe_context * pctx,struct pipe_query * pquery,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * pres,unsigned offset)1392 zink_get_query_result_resource(struct pipe_context *pctx,
1393                                struct pipe_query *pquery,
1394                                enum pipe_query_flags flags,
1395                                enum pipe_query_value_type result_type,
1396                                int index,
1397                                struct pipe_resource *pres,
1398                                unsigned offset)
1399 {
1400    struct zink_context *ctx = zink_context(pctx);
1401    struct zink_screen *screen = zink_screen(pctx->screen);
1402    struct zink_query *query = (struct zink_query*)pquery;
1403    struct zink_resource *res = zink_resource(pres);
1404    unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
1405    VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
1406    unsigned num_queries = get_num_starts(query);
1407 
1408    /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */
1409    uint64_t u64[4] = {0};
1410    unsigned src_offset = result_size * get_num_results(query);
1411    if (!num_queries) {
1412       tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1413       return;
1414    }
1415 
1416    if (index == -1) {
1417       /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
1418        * in addition to the availability result, which is a problem if we're just trying to get availability data
1419        *
1420        * if we know that there's no valid buffer data in the preceding buffer range, then we can just
1421        * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the
1422        * buffer
1423        */
1424 
1425       VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
1426       if (zink_batch_usage_check_completion(ctx, query->batch_uses)) {
1427          struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1428          unsigned query_id = start->vkq[0]->query_id;
1429          VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
1430                                    sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1431          if (result == VK_SUCCESS) {
1432             tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1433             return;
1434          } else {
1435             mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result));
1436          }
1437       }
1438       struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
1439       copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1440       zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size);
1441       pipe_resource_reference(&staging, NULL);
1442       return;
1443    }
1444 
1445    /*
1446       there is an implicit execution dependency from
1447       each such query command to all query commands previously submitted to the same queue. There
1448       is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
1449       include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
1450       the results of vkCmdEndQuery are available.
1451 
1452     * - Chapter 18. Queries
1453     */
1454    size_flags |= VK_QUERY_RESULT_WAIT_BIT;
1455    if (!is_time_query(query) && !is_bool_query(query)) {
1456       if (num_queries == 1 && !is_emulated_primgen(query) &&
1457                               query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
1458                               !is_bool_query(query)) {
1459          if (size_flags == VK_QUERY_RESULT_64_BIT) {
1460             if (query->needs_update)
1461                update_qbo(ctx, query);
1462             /* internal qbo always writes 64bit value so we can just direct copy */
1463             zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
1464                              get_buffer_offset(query),
1465                              result_size);
1466          } else
1467             /* have to do a new copy for 32bit */
1468             copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);
1469          return;
1470       }
1471    }
1472 
1473    /* TODO: use CS to aggregate results */
1474 
1475    /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either
1476     * clobbering all but the last result or writing the results sequentially, so we have to manually write the result
1477     */
1478    force_cpu_read(ctx, pquery, result_type, pres, offset);
1479 }
1480 
1481 uint64_t
zink_get_timestamp(struct pipe_screen * pscreen)1482 zink_get_timestamp(struct pipe_screen *pscreen)
1483 {
1484    struct zink_screen *screen = zink_screen(pscreen);
1485    uint64_t timestamp, deviation;
1486    if (screen->info.have_EXT_calibrated_timestamps) {
1487       VkCalibratedTimestampInfoEXT cti = {0};
1488       cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
1489       cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
1490       VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, &timestamp, &deviation);
1491       if (result != VK_SUCCESS) {
1492          mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result));
1493       }
1494    } else {
1495       zink_screen_lock_context(screen);
1496       struct pipe_context *pctx = &screen->copy_context->base;
1497       struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
1498       if (!pquery)
1499          return 0;
1500       union pipe_query_result result = {0};
1501       pctx->begin_query(pctx, pquery);
1502       pctx->end_query(pctx, pquery);
1503       pctx->get_query_result(pctx, pquery, true, &result);
1504       pctx->destroy_query(pctx, pquery);
1505       zink_screen_unlock_context(screen);
1506       timestamp = result.u64;
1507    }
1508    timestamp_to_nanoseconds(screen, &timestamp);
1509    return timestamp;
1510 }
1511 
1512 void
zink_context_query_init(struct pipe_context * pctx)1513 zink_context_query_init(struct pipe_context *pctx)
1514 {
1515    struct zink_context *ctx = zink_context(pctx);
1516    list_inithead(&ctx->suspended_queries);
1517    list_inithead(&ctx->primitives_generated_queries);
1518 
1519    pctx->create_query = zink_create_query;
1520    pctx->destroy_query = zink_destroy_query;
1521    pctx->begin_query = zink_begin_query;
1522    pctx->end_query = zink_end_query;
1523    pctx->get_query_result = zink_get_query_result;
1524    pctx->get_query_result_resource = zink_get_query_result_resource;
1525    pctx->set_active_query_state = zink_set_active_query_state;
1526    pctx->render_condition = zink_render_condition;
1527 }
1528 
1529 int
zink_get_driver_query_group_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_group_info * info)1530 zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
1531                                  struct pipe_driver_query_group_info *info)
1532 {
1533    if (!info)
1534       return 1;
1535 
1536    assert(index == 0);
1537    info->name = "Zink counters";
1538    info->max_active_queries = ARRAY_SIZE(zink_specific_queries);
1539    info->num_queries = ARRAY_SIZE(zink_specific_queries);
1540 
1541    return 1;
1542 }
1543 
1544 int
zink_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)1545 zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
1546                            struct pipe_driver_query_info *info)
1547 {
1548    if (!info)
1549       return ARRAY_SIZE(zink_specific_queries);
1550 
1551    assert(index < ARRAY_SIZE(zink_specific_queries));
1552    *info = zink_specific_queries[index];
1553 
1554    return 1;
1555 }
1556