1 #include "zink_query.h"
2
3 #include "zink_context.h"
4 #include "zink_clear.h"
5 #include "zink_program.h"
6 #include "zink_resource.h"
7 #include "zink_screen.h"
8
9 #include "util/u_dump.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12
13 #define NUM_QUERIES 500
14
15 #define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0)
16
17 struct zink_query_pool {
18 struct list_head list;
19 VkQueryType vk_query_type;
20 VkQueryPipelineStatisticFlags pipeline_stats;
21 VkQueryPool query_pool;
22 unsigned last_range;
23 unsigned refcount;
24 };
25
26 struct zink_query_buffer {
27 struct list_head list;
28 unsigned num_results;
29 struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS];
30 };
31
32 struct zink_vk_query {
33 struct zink_query_pool *pool;
34 unsigned query_id;
35 bool needs_reset;
36 bool started;
37 uint32_t refcount;
38 };
39
40 struct zink_query_start {
41 union {
42 struct {
43 bool have_gs;
44 bool have_xfb;
45 bool was_line_loop;
46 };
47 uint32_t data;
48 };
49 struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS];
50 };
51
52 struct zink_query {
53 struct threaded_query base;
54 enum pipe_query_type type;
55
56 /* Everytime the gallium query needs
57 * another vulkan query, add a new start.
58 */
59 struct util_dynarray starts;
60 unsigned start_offset;
61
62 VkQueryType vkqtype;
63 unsigned index;
64 bool precise;
65
66 bool active; /* query is considered active by vk */
67 bool needs_reset; /* query is considered active by vk and cannot be destroyed */
68 bool dead; /* query should be destroyed when its fence finishes */
69 bool needs_update; /* query needs to update its qbos */
70 bool needs_rast_discard_workaround; /* query needs discard disabled */
71 bool suspended;
72 bool started_in_rp; //needs to be stopped in rp
73
74 struct list_head active_list;
75
76 struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
77 bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */
78
79 struct zink_batch_usage *batch_uses; //batch that the query was started in
80
81 struct list_head buffers;
82 union {
83 struct zink_query_buffer *curr_qbo;
84 struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED
85 };
86
87 struct zink_resource *predicate;
88 bool predicate_dirty;
89 };
90
91 static const struct pipe_driver_query_info zink_specific_queries[] = {
92 {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }},
93 };
94
95 static inline int
get_num_starts(struct zink_query * q)96 get_num_starts(struct zink_query *q)
97 {
98 return util_dynarray_num_elements(&q->starts, struct zink_query_start);
99 }
100
101 static void
102 update_query_id(struct zink_context *ctx, struct zink_query *q);
103
104
105 static VkQueryPipelineStatisticFlags
pipeline_statistic_convert(enum pipe_statistics_query_index idx)106 pipeline_statistic_convert(enum pipe_statistics_query_index idx)
107 {
108 unsigned map[] = {
109 [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
110 [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
111 [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
112 [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
113 [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
114 [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
115 [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
116 [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
117 [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
118 [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
119 [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
120 };
121 assert(idx < ARRAY_SIZE(map));
122 return map[idx];
123 }
124
125 static void
begin_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index,VkQueryControlFlags flags)126 begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index,
127 VkQueryControlFlags flags)
128 {
129 struct zink_batch *batch = &ctx->batch;
130 if (!vkq->started) {
131 VKCTX(CmdBeginQueryIndexedEXT)(batch->state->cmdbuf,
132 vkq->pool->query_pool,
133 vkq->query_id,
134 flags,
135 index);
136 vkq->started = true;
137 }
138 }
139
140 static void
end_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index)141 end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index)
142 {
143 struct zink_batch *batch = &ctx->batch;
144 if (vkq->started) {
145 VKCTX(CmdEndQueryIndexedEXT)(batch->state->cmdbuf,
146 vkq->pool->query_pool,
147 vkq->query_id, index);
148 vkq->started = false;
149 }
150 }
151
152 static void
reset_vk_query_pool(struct zink_context * ctx,struct zink_vk_query * vkq)153 reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq)
154 {
155 struct zink_batch *batch = &ctx->batch;
156 if (vkq->needs_reset) {
157 VKCTX(CmdResetQueryPool)(batch->state->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1);
158 batch->state->has_barriers = true;
159 }
160 vkq->needs_reset = false;
161 }
162
163 void
zink_context_destroy_query_pools(struct zink_context * ctx)164 zink_context_destroy_query_pools(struct zink_context *ctx)
165 {
166 struct zink_screen *screen = zink_screen(ctx->base.screen);
167 list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) {
168 VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
169 list_del(&pool->list);
170 FREE(pool);
171 }
172 }
173
174 static struct zink_query_pool *
find_or_allocate_qp(struct zink_context * ctx,struct zink_query * q,unsigned idx)175 find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx)
176 {
177 VkQueryPipelineStatisticFlags pipeline_stats = 0;
178 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
179 pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
180 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
181 else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
182 pipeline_stats = pipeline_statistic_convert(q->index);
183
184 VkQueryType vk_query_type = q->vkqtype;
185 /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
186 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) {
187 vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
188 pipeline_stats = 0;
189 }
190
191 struct zink_screen *screen = zink_screen(ctx->base.screen);
192 list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) {
193 if (pool->vk_query_type == vk_query_type) {
194 if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
195 if (pool->pipeline_stats == pipeline_stats)
196 return pool;
197 } else
198 return pool;
199 }
200 }
201
202 struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool);
203 if (!new_pool)
204 return NULL;
205
206 new_pool->vk_query_type = vk_query_type;
207 new_pool->pipeline_stats = pipeline_stats;
208
209 VkQueryPoolCreateInfo pool_create = {0};
210 pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
211 pool_create.queryType = vk_query_type;
212 pool_create.queryCount = NUM_QUERIES;
213 pool_create.pipelineStatistics = pipeline_stats;
214
215 VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool);
216 if (status != VK_SUCCESS) {
217 mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status));
218 FREE(new_pool);
219 return NULL;
220 }
221
222 list_addtail(&new_pool->list, &ctx->query_pools);
223 return new_pool;
224 }
225
226 static void
227 update_qbo(struct zink_context *ctx, struct zink_query *q);
228 static void
229 reset_qbos(struct zink_context *ctx, struct zink_query *q);
230
231
232 static bool
is_emulated_primgen(const struct zink_query * q)233 is_emulated_primgen(const struct zink_query *q)
234 {
235 return q->type == PIPE_QUERY_PRIMITIVES_GENERATED &&
236 q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT;
237 }
238
239 static inline unsigned
get_num_query_pools(struct zink_query * q)240 get_num_query_pools(struct zink_query *q)
241 {
242 if (is_emulated_primgen(q))
243 return 2;
244 return 1;
245 }
246
247 static inline unsigned
get_num_queries(struct zink_query * q)248 get_num_queries(struct zink_query *q)
249 {
250 if (is_emulated_primgen(q))
251 return 2;
252 if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
253 return PIPE_MAX_VERTEX_STREAMS;
254 return 1;
255 }
256
257 static inline unsigned
get_num_results(struct zink_query * q)258 get_num_results(struct zink_query *q)
259 {
260 if (q->type < PIPE_QUERY_DRIVER_SPECIFIC &&
261 q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
262 return 1;
263 switch (q->type) {
264 case PIPE_QUERY_OCCLUSION_COUNTER:
265 case PIPE_QUERY_OCCLUSION_PREDICATE:
266 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
267 case PIPE_QUERY_TIME_ELAPSED:
268 case PIPE_QUERY_TIMESTAMP:
269 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
270 return 1;
271 case PIPE_QUERY_PRIMITIVES_GENERATED:
272 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
273 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
274 case PIPE_QUERY_PRIMITIVES_EMITTED:
275 return 2;
276 default:
277 debug_printf("unknown query: %s\n",
278 util_str_query_type(q->type, true));
279 unreachable("zink: unknown query type");
280 }
281 }
282
283 static void
timestamp_to_nanoseconds(struct zink_screen * screen,uint64_t * timestamp)284 timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
285 {
286 /* The number of valid bits in a timestamp value is determined by
287 * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.
288 * - 17.5. Timestamp Queries
289 */
290 if (screen->timestamp_valid_bits < 64)
291 *timestamp &= (1ull << screen->timestamp_valid_bits) - 1;
292
293 /* The number of nanoseconds it takes for a timestamp value to be incremented by 1
294 * can be obtained from VkPhysicalDeviceLimits::timestampPeriod
295 * - 17.5. Timestamp Queries
296 */
297 *timestamp *= (double)screen->info.props.limits.timestampPeriod;
298 }
299
300 static VkQueryType
convert_query_type(struct zink_screen * screen,enum pipe_query_type query_type,bool * precise)301 convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise)
302 {
303 *precise = false;
304 switch (query_type) {
305 case PIPE_QUERY_OCCLUSION_COUNTER:
306 *precise = true;
307 FALLTHROUGH;
308 case PIPE_QUERY_OCCLUSION_PREDICATE:
309 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
310 return VK_QUERY_TYPE_OCCLUSION;
311 case PIPE_QUERY_TIME_ELAPSED:
312 case PIPE_QUERY_TIMESTAMP:
313 return VK_QUERY_TYPE_TIMESTAMP;
314 case PIPE_QUERY_PRIMITIVES_GENERATED:
315 return screen->info.have_EXT_primitives_generated_query ?
316 VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT :
317 VK_QUERY_TYPE_PIPELINE_STATISTICS;
318 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
319 return VK_QUERY_TYPE_PIPELINE_STATISTICS;
320 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
321 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
322 case PIPE_QUERY_PRIMITIVES_EMITTED:
323 return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
324 default:
325 debug_printf("unknown query: %s\n",
326 util_str_query_type(query_type, true));
327 unreachable("zink: unknown query type");
328 }
329 }
330
331 static bool
needs_stats_list(struct zink_query * query)332 needs_stats_list(struct zink_query *query)
333 {
334 return is_emulated_primgen(query) ||
335 query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
336 query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
337 }
338
339 static bool
is_time_query(struct zink_query * query)340 is_time_query(struct zink_query *query)
341 {
342 return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;
343 }
344
345 static bool
is_so_overflow_query(struct zink_query * query)346 is_so_overflow_query(struct zink_query *query)
347 {
348 return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
349 }
350
351 static bool
is_bool_query(struct zink_query * query)352 is_bool_query(struct zink_query *query)
353 {
354 return is_so_overflow_query(query) ||
355 query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
356 query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
357 query->type == PIPE_QUERY_GPU_FINISHED;
358 }
359
360 static bool
qbo_append(struct pipe_screen * screen,struct zink_query * query)361 qbo_append(struct pipe_screen *screen, struct zink_query *query)
362 {
363 if (query->curr_qbo && query->curr_qbo->list.next)
364 return true;
365 struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
366 if (!qbo)
367 return false;
368 int num_buffers = get_num_queries(query);
369
370 for (unsigned i = 0; i < num_buffers; i++) {
371 qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
372 PIPE_USAGE_STAGING,
373 /* this is the maximum possible size of the results in a given buffer */
374 (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t));
375 if (!qbo->buffers[i])
376 goto fail;
377 }
378 list_addtail(&qbo->list, &query->buffers);
379
380 return true;
381 fail:
382 for (unsigned i = 0; i < num_buffers; i++)
383 pipe_resource_reference(&qbo->buffers[i], NULL);
384 FREE(qbo);
385 return false;
386 }
387
388 static void
unref_vk_pool(struct zink_context * ctx,struct zink_query_pool * pool)389 unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool)
390 {
391 if (!pool || --pool->refcount)
392 return;
393 util_dynarray_append(&ctx->batch.state->dead_querypools, VkQueryPool, pool->query_pool);
394 if (list_is_linked(&pool->list))
395 list_del(&pool->list);
396 FREE(pool);
397 }
398
399 static void
unref_vk_query(struct zink_context * ctx,struct zink_vk_query * vkq)400 unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq)
401 {
402 if (!vkq)
403 return;
404 unref_vk_pool(ctx, vkq->pool);
405 vkq->refcount--;
406 if (vkq->refcount == 0)
407 FREE(vkq);
408 }
409
410 static void
destroy_query(struct zink_context * ctx,struct zink_query * query)411 destroy_query(struct zink_context *ctx, struct zink_query *query)
412 {
413 ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen);
414 assert(zink_screen_usage_check_completion(screen, query->batch_uses));
415 struct zink_query_buffer *qbo, *next;
416
417 struct zink_query_start *starts = query->starts.data;
418 unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start);
419 for (unsigned j = 0; j < num_starts; j++) {
420 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
421 unref_vk_query(ctx, starts[j].vkq[i]);
422 }
423 }
424
425 util_dynarray_fini(&query->starts);
426 LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
427 for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++)
428 pipe_resource_reference(&qbo->buffers[i], NULL);
429 FREE(qbo);
430 }
431 pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
432 FREE(query);
433 }
434
435 static void
reset_qbo(struct zink_query * q)436 reset_qbo(struct zink_query *q)
437 {
438 q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);
439 q->curr_qbo->num_results = 0;
440 }
441
442 static void
query_pool_get_range(struct zink_context * ctx,struct zink_query * q)443 query_pool_get_range(struct zink_context *ctx, struct zink_query *q)
444 {
445 bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
446 struct zink_query_start *start;
447 int num_queries = get_num_queries(q);
448 if (!is_timestamp || get_num_starts(q) == 0) {
449 size_t size = q->starts.capacity;
450 start = util_dynarray_grow(&q->starts, struct zink_query_start, 1);
451 if (size != q->starts.capacity) {
452 /* when resizing, always zero the new data to avoid garbage */
453 uint8_t *data = q->starts.data;
454 memset(data + size, 0, q->starts.capacity - size);
455 }
456 } else {
457 start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
458 }
459 start->data = 0;
460
461 unsigned num_pools = get_num_query_pools(q);
462 for (unsigned i = 0; i < num_queries; i++) {
463 int pool_idx = num_pools > 1 ? i : 0;
464 /* try and find the active query for this */
465 struct zink_vk_query *vkq;
466 int xfb_idx = num_queries == 4 ? i : q->index;
467 if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
468 (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) {
469 vkq = ctx->curr_xfb_queries[xfb_idx];
470 vkq->refcount++;
471 vkq->pool->refcount++;
472 } else {
473 struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx);
474 if (pool->last_range == NUM_QUERIES) {
475 list_del(&pool->list);
476 pool = find_or_allocate_qp(ctx, q, pool_idx);
477 }
478 vkq = CALLOC_STRUCT(zink_vk_query);
479 if (!vkq) {
480 mesa_loge("ZINK: failed to allocate vkq!");
481 return;
482 }
483
484 pool->refcount++;
485 vkq->refcount = 1;
486 vkq->needs_reset = true;
487 vkq->pool = pool;
488 vkq->started = false;
489 vkq->query_id = pool->last_range++;
490 }
491 unref_vk_query(ctx, start->vkq[i]);
492 start->vkq[i] = vkq;
493 }
494 }
495
496 static struct pipe_query *
zink_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)497 zink_create_query(struct pipe_context *pctx,
498 unsigned query_type, unsigned index)
499 {
500 struct zink_screen *screen = zink_screen(pctx->screen);
501 struct zink_query *query = CALLOC_STRUCT(zink_query);
502
503 if (!query)
504 return NULL;
505 list_inithead(&query->buffers);
506
507 query->index = index;
508 query->type = query_type;
509
510 if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
511 return (struct pipe_query *)query;
512
513 if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
514 return (struct pipe_query *)query;
515 query->vkqtype = convert_query_type(screen, query_type, &query->precise);
516 if (query->vkqtype == -1)
517 return NULL;
518
519 util_dynarray_init(&query->starts, NULL);
520
521 assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
522
523 /* use emulated path for drivers without full support */
524 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index &&
525 !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams)
526 query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS;
527
528 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
529 query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard;
530 } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
531 query->needs_rast_discard_workaround = true;
532 }
533
534 if (!qbo_append(pctx->screen, query))
535 goto fail;
536 struct zink_batch *batch = &zink_context(pctx)->batch;
537 batch->has_work = true;
538 query->needs_reset = true;
539 query->predicate_dirty = true;
540 if (query->type == PIPE_QUERY_TIMESTAMP) {
541 query->active = true;
542 /* defer pool reset until end_query since we're guaranteed to be threadsafe then */
543 reset_qbo(query);
544 }
545 return (struct pipe_query *)query;
546 fail:
547 destroy_query(zink_context(pctx), query);
548 return NULL;
549 }
550
551 static void
zink_destroy_query(struct pipe_context * pctx,struct pipe_query * q)552 zink_destroy_query(struct pipe_context *pctx,
553 struct pipe_query *q)
554 {
555 struct zink_query *query = (struct zink_query *)q;
556
557 /* only destroy if this query isn't active on any batches,
558 * otherwise just mark dead and wait
559 */
560 if (query->batch_uses) {
561 query->dead = true;
562 return;
563 }
564
565 destroy_query(zink_context(pctx), query);
566 }
567
568 void
zink_prune_query(struct zink_batch_state * bs,struct zink_query * query)569 zink_prune_query(struct zink_batch_state *bs, struct zink_query *query)
570 {
571 if (!zink_batch_usage_matches(query->batch_uses, bs))
572 return;
573 query->batch_uses = NULL;
574 if (query->dead)
575 destroy_query(bs->ctx, query);
576 }
577
578 static void
check_query_results(struct zink_query * query,union pipe_query_result * result,int num_starts,uint64_t * results,uint64_t * xfb_results)579 check_query_results(struct zink_query *query, union pipe_query_result *result,
580 int num_starts, uint64_t *results, uint64_t *xfb_results)
581 {
582 uint64_t last_val = 0;
583 int result_size = get_num_results(query);
584 int idx = 0;
585 util_dynarray_foreach(&query->starts, struct zink_query_start, start) {
586 unsigned i = idx * result_size;
587 idx++;
588 switch (query->type) {
589 case PIPE_QUERY_OCCLUSION_PREDICATE:
590 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
591 case PIPE_QUERY_GPU_FINISHED:
592 result->b |= results[i] != 0;
593 break;
594
595 case PIPE_QUERY_TIME_ELAPSED:
596 case PIPE_QUERY_TIMESTAMP:
597 /* the application can sum the differences between all N queries to determine the total execution time.
598 * - 17.5. Timestamp Queries
599 */
600 if (query->type != PIPE_QUERY_TIME_ELAPSED || i)
601 result->u64 += results[i] - last_val;
602 last_val = results[i];
603 break;
604 case PIPE_QUERY_OCCLUSION_COUNTER:
605 result->u64 += results[i];
606 break;
607 case PIPE_QUERY_PRIMITIVES_GENERATED:
608 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
609 result->u64 += results[i];
610 else if (start->have_xfb || query->index)
611 result->u64 += xfb_results[i + 1];
612 else
613 /* if a given draw had a geometry shader, we need to use the first result */
614 result->u64 += results[i + !start->have_gs];
615 break;
616 case PIPE_QUERY_PRIMITIVES_EMITTED:
617 /* A query pool created with this type will capture 2 integers -
618 * numPrimitivesWritten and numPrimitivesNeeded -
619 * for the specified vertex stream output from the last vertex processing stage.
620 * - from VK_EXT_transform_feedback spec
621 */
622 result->u64 += results[i];
623 break;
624 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
625 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
626 /* A query pool created with this type will capture 2 integers -
627 * numPrimitivesWritten and numPrimitivesNeeded -
628 * for the specified vertex stream output from the last vertex processing stage.
629 * - from VK_EXT_transform_feedback spec
630 */
631 if (start->have_xfb)
632 result->b |= results[i] != results[i + 1];
633 break;
634 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
635 switch (query->index) {
636 case PIPE_STAT_QUERY_IA_VERTICES:
637 result->u64 += start->was_line_loop ? results[i] / 2 : results[i];
638 break;
639 default:
640 result->u64 += results[i];
641 break;
642 }
643 break;
644
645 default:
646 debug_printf("unhandled query type: %s\n",
647 util_str_query_type(query->type, true));
648 unreachable("unexpected query type");
649 }
650 }
651 }
652
653 static bool
get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)654 get_query_result(struct pipe_context *pctx,
655 struct pipe_query *q,
656 bool wait,
657 union pipe_query_result *result)
658 {
659 struct zink_screen *screen = zink_screen(pctx->screen);
660 struct zink_query *query = (struct zink_query *)q;
661 unsigned flags = PIPE_MAP_READ;
662
663 if (!wait)
664 flags |= PIPE_MAP_DONTBLOCK;
665 if (query->base.flushed)
666 /* this is not a context-safe operation; ensure map doesn't use slab alloc */
667 flags |= PIPE_MAP_THREAD_SAFE;
668
669 util_query_clear_result(result, query->type);
670
671 int num_starts = get_num_starts(query);
672 /* no results: return zero */
673 if (!num_starts)
674 return true;
675 int result_size = get_num_results(query) * sizeof(uint64_t);
676 int num_maps = get_num_queries(query);
677
678 struct zink_query_buffer *qbo;
679 struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 };
680 LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
681 uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL };
682 bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP;
683 if (!qbo->num_results)
684 continue;
685
686 for (unsigned i = 0; i < num_maps; i++) {
687 results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0,
688 (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]);
689 if (!results[i]) {
690 if (wait)
691 debug_printf("zink: qbo read failed!");
692 goto fail;
693 }
694 }
695 if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
696 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) {
697 check_query_results(query, result, num_starts, results[i], NULL);
698 }
699 } else
700 check_query_results(query, result, num_starts, results[0], results[1]);
701
702 for (unsigned i = 0 ; i < num_maps; i++)
703 pipe_buffer_unmap(pctx, xfer[i]);
704
705 /* if overflow is detected we can stop */
706 if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b)
707 break;
708 }
709
710 if (is_time_query(query))
711 timestamp_to_nanoseconds(screen, &result->u64);
712
713 return true;
714 fail:
715 for (unsigned i = 0 ; i < num_maps; i++)
716 if (xfer[i])
717 pipe_buffer_unmap(pctx, xfer[i]);
718 return false;
719 }
720
721 static void
force_cpu_read(struct zink_context * ctx,struct pipe_query * pquery,enum pipe_query_value_type result_type,struct pipe_resource * pres,unsigned offset)722 force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)
723 {
724 struct pipe_context *pctx = &ctx->base;
725 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
726 struct zink_query *query = (struct zink_query*)pquery;
727 union pipe_query_result result = {0};
728
729 if (query->needs_update)
730 update_qbo(ctx, query);
731
732 bool success = get_query_result(pctx, pquery, true, &result);
733 if (!success) {
734 debug_printf("zink: getting query result failed\n");
735 return;
736 }
737
738 if (result_type <= PIPE_QUERY_TYPE_U32) {
739 uint32_t u32;
740 uint32_t limit;
741 if (result_type == PIPE_QUERY_TYPE_I32)
742 limit = INT_MAX;
743 else
744 limit = UINT_MAX;
745 if (is_bool_query(query))
746 u32 = result.b;
747 else
748 u32 = MIN2(limit, result.u64);
749 tc_buffer_write(pctx, pres, offset, result_size, &u32);
750 } else {
751 uint64_t u64;
752 if (is_bool_query(query))
753 u64 = result.b;
754 else
755 u64 = result.u64;
756 tc_buffer_write(pctx, pres, offset, result_size, &u64);
757 }
758 }
759
760 static void
copy_pool_results_to_buffer(struct zink_context * ctx,struct zink_query * query,VkQueryPool pool,unsigned query_id,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)761 copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,
762 unsigned query_id, struct zink_resource *res, unsigned offset,
763 int num_results, VkQueryResultFlags flags)
764 {
765 struct zink_batch *batch = &ctx->batch;
766 unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
767 unsigned base_result_size = get_num_results(query) * type_size;
768 unsigned result_size = base_result_size * num_results;
769 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
770 result_size += type_size;
771
772 bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results);
773
774 zink_batch_no_rp(ctx);
775 /* if it's a single query that doesn't need special handling, we can copy it and be done */
776 zink_batch_reference_resource_rw(batch, res, true);
777 res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
778 res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
779 util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
780 assert(query_id < NUM_QUERIES);
781 res->obj->unordered_read = res->obj->unordered_write = false;
782 VKCTX(CmdCopyQueryPoolResults)(batch->state->cmdbuf, pool, query_id, num_results, res->obj->buffer,
783 offset, base_result_size, flags);
784 zink_cmd_debug_marker_end(ctx, batch->state->cmdbuf, marker);
785 }
786
787 static void
copy_results_to_buffer(struct zink_context * ctx,struct zink_query * query,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)788 copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
789 {
790 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
791 copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags);
792 }
793
794
795 static void
reset_query_range(struct zink_context * ctx,struct zink_query * q)796 reset_query_range(struct zink_context *ctx, struct zink_query *q)
797 {
798 int num_queries = get_num_queries(q);
799 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
800 for (unsigned i = 0; i < num_queries; i++) {
801 reset_vk_query_pool(ctx, start->vkq[i]);
802 }
803 }
804
805 static void
reset_qbos(struct zink_context * ctx,struct zink_query * q)806 reset_qbos(struct zink_context *ctx, struct zink_query *q)
807 {
808 if (q->needs_update)
809 update_qbo(ctx, q);
810
811 q->needs_reset = false;
812 /* create new qbo for non-timestamp queries:
813 * timestamp queries should never need more than 2 entries in the qbo
814 */
815 if (q->type == PIPE_QUERY_TIMESTAMP)
816 return;
817 if (qbo_append(ctx->base.screen, q))
818 reset_qbo(q);
819 else
820 debug_printf("zink: qbo alloc failed on reset!");
821 }
822
823 static inline unsigned
get_buffer_offset(struct zink_query * q)824 get_buffer_offset(struct zink_query *q)
825 {
826 return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t);
827 }
828
829 static void
update_qbo(struct zink_context * ctx,struct zink_query * q)830 update_qbo(struct zink_context *ctx, struct zink_query *q)
831 {
832 struct zink_query_buffer *qbo = q->curr_qbo;
833 unsigned num_starts = get_num_starts(q);
834 struct zink_query_start *starts = q->starts.data;
835 bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
836 /* timestamp queries just write to offset 0 always */
837 int num_queries = get_num_queries(q);
838 unsigned num_results = qbo->num_results;
839 for (unsigned i = 0; i < num_queries; i++) {
840 unsigned start_offset = q->start_offset;
841 while (start_offset < num_starts) {
842 unsigned num_merged_copies = 0;
843 VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool;
844 unsigned base_id = starts[start_offset].vkq[i]->query_id;
845 /* iterate over all the starts to see how many can be merged */
846 for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) {
847 if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies)
848 break;
849 }
850 assert(num_merged_copies);
851 unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t);
852 unsigned offset = is_timestamp ? 0 : cur_offset;
853 copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id,
854 zink_resource(qbo->buffers[i]),
855 offset,
856 num_merged_copies,
857 /*
858 there is an implicit execution dependency from
859 each such query command to all query commands previously submitted to the same queue. There
860 is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
861 include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
862 the results of vkCmdEndQuery are available.
863
864 * - Chapter 18. Queries
865 */
866 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
867 if (!is_timestamp)
868 q->curr_qbo->num_results += num_merged_copies;
869 start_offset += num_merged_copies;
870 }
871 }
872 q->start_offset += q->curr_qbo->num_results - num_results;
873
874
875 if (is_timestamp)
876 q->curr_qbo->num_results = 1;
877
878 q->needs_update = false;
879 }
880
881 static void
begin_query(struct zink_context * ctx,struct zink_batch * batch,struct zink_query * q)882 begin_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
883 {
884 VkQueryControlFlags flags = 0;
885
886 if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
887 return;
888
889 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->batch.in_rp) {
890 /* refuse to start CS queries in renderpasses */
891 if (!list_is_linked(&q->active_list))
892 list_addtail(&q->active_list, &ctx->suspended_queries);
893 q->suspended = true;
894 return;
895 }
896
897 zink_flush_dgc_if_enabled(ctx);
898
899 update_query_id(ctx, q);
900 q->predicate_dirty = true;
901 if (q->needs_reset)
902 reset_qbos(ctx, q);
903 reset_query_range(ctx, q);
904 q->active = true;
905 batch->has_work = true;
906
907 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
908 if (q->type == PIPE_QUERY_TIME_ELAPSED) {
909 VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
910 if (!batch->in_rp)
911 update_qbo(ctx, q);
912 zink_batch_usage_set(&q->batch_uses, batch->state);
913 _mesa_set_add(&batch->state->active_queries, q);
914 }
915 /* ignore the rest of begin_query for timestamps */
916 if (is_time_query(q))
917 return;
918
919 /* A query must either begin and end inside the same subpass of a render pass
920 instance, or must both begin and end outside of a render pass instance
921 (i.e. contain entire render pass instances).
922 - 18.2. Query Operation
923 */
924 q->started_in_rp = ctx->batch.in_rp;
925
926 if (q->precise)
927 flags |= VK_QUERY_CONTROL_PRECISE_BIT;
928
929 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
930 is_emulated_primgen(q) ||
931 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
932 struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
933 assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq);
934 ctx->curr_xfb_queries[q->index] = vkq;
935
936 begin_vk_query_indexed(ctx, vkq, q->index, flags);
937 } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
938 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
939 assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]);
940 ctx->curr_xfb_queries[i] = start->vkq[i];
941
942 begin_vk_query_indexed(ctx, start->vkq[i], i, flags);
943 }
944 } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
945 begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags);
946 }
947 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
948 VKCTX(CmdBeginQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags);
949 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES) {
950 assert(!ctx->vertices_query);
951 ctx->vertices_query = q;
952 }
953 if (needs_stats_list(q))
954 list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
955 zink_batch_usage_set(&q->batch_uses, batch->state);
956 _mesa_set_add(&batch->state->active_queries, q);
957 if (q->needs_rast_discard_workaround) {
958 ctx->primitives_generated_active = true;
959 if (zink_set_rasterizer_discard(ctx, true))
960 zink_set_null_fs(ctx);
961 }
962 }
963
964 static bool
zink_begin_query(struct pipe_context * pctx,struct pipe_query * q)965 zink_begin_query(struct pipe_context *pctx,
966 struct pipe_query *q)
967 {
968 struct zink_query *query = (struct zink_query *)q;
969 struct zink_context *ctx = zink_context(pctx);
970 struct zink_batch *batch = &ctx->batch;
971
972 /* drop all past results */
973 reset_qbo(query);
974
975 if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
976 ctx->occlusion_query_active = true;
977 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
978 ctx->fs_query_active = true;
979
980 query->predicate_dirty = true;
981
982 util_dynarray_clear(&query->starts);
983 query->start_offset = 0;
984
985 if (batch->in_rp) {
986 begin_query(ctx, batch, query);
987 } else {
988 /* never directly start queries out of renderpass, always defer */
989 list_addtail(&query->active_list, &ctx->suspended_queries);
990 query->suspended = true;
991 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
992 ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
993 }
994
995 return true;
996 }
997
998 static void
update_query_id(struct zink_context * ctx,struct zink_query * q)999 update_query_id(struct zink_context *ctx, struct zink_query *q)
1000 {
1001 query_pool_get_range(ctx, q);
1002 ctx->batch.has_work = true;
1003 q->has_draws = false;
1004 }
1005
1006 static void
end_query(struct zink_context * ctx,struct zink_batch * batch,struct zink_query * q)1007 end_query(struct zink_context *ctx, struct zink_batch *batch, struct zink_query *q)
1008 {
1009 if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1010 return;
1011
1012 zink_flush_dgc_if_enabled(ctx);
1013
1014 ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
1015 assert(qbo);
1016 assert(!is_time_query(q));
1017 q->active = false;
1018 assert(q->started_in_rp == batch->in_rp);
1019 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
1020
1021 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
1022 is_emulated_primgen(q) ||
1023 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
1024 struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
1025
1026 end_vk_query_indexed(ctx, vkq, q->index);
1027 ctx->curr_xfb_queries[q->index] = NULL;
1028 }
1029 else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1030 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
1031 end_vk_query_indexed(ctx, start->vkq[i], i);
1032 ctx->curr_xfb_queries[i] = NULL;
1033 }
1034 } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
1035 end_vk_query_indexed(ctx, start->vkq[0], q->index);
1036 }
1037 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT &&
1038 q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q))
1039 VKCTX(CmdEndQuery)(batch->state->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1040
1041 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
1042 q->index == PIPE_STAT_QUERY_IA_VERTICES)
1043 ctx->vertices_query = NULL;
1044
1045 if (needs_stats_list(q))
1046 list_delinit(&q->stats_list);
1047
1048 q->needs_update = true;
1049 if (q->needs_rast_discard_workaround) {
1050 ctx->primitives_generated_active = false;
1051 if (zink_set_rasterizer_discard(ctx, false))
1052 zink_set_null_fs(ctx);
1053 }
1054 }
1055
1056 static bool
zink_end_query(struct pipe_context * pctx,struct pipe_query * q)1057 zink_end_query(struct pipe_context *pctx,
1058 struct pipe_query *q)
1059 {
1060 struct zink_context *ctx = zink_context(pctx);
1061 struct zink_query *query = (struct zink_query *)q;
1062 struct zink_batch *batch = &ctx->batch;
1063
1064 if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1065 return true;
1066
1067 if (query->type == PIPE_QUERY_GPU_FINISHED) {
1068 pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
1069 return true;
1070 }
1071
1072 /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
1073 threaded_context_unwrap_sync(pctx);
1074
1075 if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
1076 ctx->occlusion_query_active = true;
1077 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
1078 ctx->fs_query_active = true;
1079
1080 bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active);
1081 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1082 ctx->primitives_generated_suspended = false;
1083
1084 if (list_is_linked(&query->stats_list))
1085 list_delinit(&query->stats_list);
1086 if (query->suspended) {
1087 list_delinit(&query->active_list);
1088 query->suspended = false;
1089 }
1090 if (is_time_query(query)) {
1091 update_query_id(ctx, query);
1092 if (query->needs_reset)
1093 reset_qbos(ctx, query);
1094 reset_query_range(ctx, query);
1095 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1096 VKCTX(CmdWriteTimestamp)(batch->state->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
1097 start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1098 zink_batch_usage_set(&query->batch_uses, batch->state);
1099 _mesa_set_add(&batch->state->active_queries, query);
1100 query->needs_update = true;
1101 } else if (query->active) {
1102 /* this should be a tc-optimized query end that doesn't split a renderpass */
1103 if (!query->started_in_rp)
1104 zink_batch_no_rp(ctx);
1105 end_query(ctx, batch, query);
1106 }
1107
1108 if (unset_null_fs)
1109 zink_set_null_fs(ctx);
1110
1111 return true;
1112 }
1113
1114 static bool
zink_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)1115 zink_get_query_result(struct pipe_context *pctx,
1116 struct pipe_query *q,
1117 bool wait,
1118 union pipe_query_result *result)
1119 {
1120 struct zink_query *query = (void*)q;
1121 struct zink_context *ctx = zink_context(pctx);
1122
1123 if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) {
1124 result->timestamp_disjoint.frequency = zink_screen(pctx->screen)->info.props.limits.timestampPeriod * 1000000.0;
1125 result->timestamp_disjoint.disjoint = false;
1126 return true;
1127 }
1128
1129 if (query->type == PIPE_QUERY_GPU_FINISHED) {
1130 struct pipe_screen *screen = pctx->screen;
1131
1132 result->b = screen->fence_finish(screen, query->base.flushed ? NULL : pctx,
1133 query->fence, wait ? OS_TIMEOUT_INFINITE : 0);
1134 return result->b;
1135 }
1136
1137 if (query->type == ZINK_QUERY_RENDER_PASSES) {
1138 result->u64 = ctx->hud.render_passes;
1139 ctx->hud.render_passes = 0;
1140 return true;
1141 }
1142
1143 if (query->needs_update) {
1144 assert(!ctx->tc || !threaded_query(q)->flushed);
1145 update_qbo(ctx, query);
1146 }
1147
1148 if (zink_batch_usage_is_unflushed(query->batch_uses)) {
1149 if (!threaded_query(q)->flushed)
1150 pctx->flush(pctx, NULL, 0);
1151 if (!wait)
1152 return false;
1153 }
1154
1155 return get_query_result(pctx, q, wait, result);
1156 }
1157
1158 static void
suspend_query(struct zink_context * ctx,struct zink_query * query)1159 suspend_query(struct zink_context *ctx, struct zink_query *query)
1160 {
1161 /* if a query isn't active here then we don't need to reactivate it on the next batch */
1162 if (query->active && !is_time_query(query))
1163 end_query(ctx, &ctx->batch, query);
1164 if (query->needs_update && !ctx->batch.in_rp)
1165 update_qbo(ctx, query);
1166 }
1167
1168 static void
suspend_queries(struct zink_context * ctx,bool rp_only)1169 suspend_queries(struct zink_context *ctx, bool rp_only)
1170 {
1171 set_foreach(&ctx->batch.state->active_queries, entry) {
1172 struct zink_query *query = (void*)entry->key;
1173 if (query->suspended || (rp_only && !query->started_in_rp))
1174 continue;
1175 if (query->active && !is_time_query(query)) {
1176 /* the fence is going to steal the set off the batch, so we have to copy
1177 * the active queries onto a list
1178 */
1179 list_addtail(&query->active_list, &ctx->suspended_queries);
1180 query->suspended = true;
1181 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1182 ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
1183 }
1184 suspend_query(ctx, query);
1185 }
1186 }
1187
1188 void
zink_suspend_queries(struct zink_context * ctx,struct zink_batch * batch)1189 zink_suspend_queries(struct zink_context *ctx, struct zink_batch *batch)
1190 {
1191 suspend_queries(ctx, false);
1192 }
1193
1194 void
zink_resume_queries(struct zink_context * ctx,struct zink_batch * batch)1195 zink_resume_queries(struct zink_context *ctx, struct zink_batch *batch)
1196 {
1197 struct zink_query *query, *next;
1198 LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1199 list_delinit(&query->active_list);
1200 query->suspended = false;
1201 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1202 ctx->primitives_generated_suspended = false;
1203 if (query->needs_update && !ctx->batch.in_rp)
1204 update_qbo(ctx, query);
1205 begin_query(ctx, batch, query);
1206 }
1207 }
1208
1209 void
zink_resume_cs_query(struct zink_context * ctx)1210 zink_resume_cs_query(struct zink_context *ctx)
1211 {
1212 struct zink_query *query, *next;
1213 LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1214 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) {
1215 list_delinit(&query->active_list);
1216 query->suspended = false;
1217 begin_query(ctx, &ctx->batch, query);
1218 }
1219 }
1220 }
1221
1222 void
zink_query_renderpass_suspend(struct zink_context * ctx)1223 zink_query_renderpass_suspend(struct zink_context *ctx)
1224 {
1225 suspend_queries(ctx, true);
1226 }
1227
1228 void
zink_query_update_gs_states(struct zink_context * ctx)1229 zink_query_update_gs_states(struct zink_context *ctx)
1230 {
1231 struct zink_query *query;
1232 bool suspendall = false;
1233 bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1234 bool have_xfb = !!ctx->num_so_targets;
1235
1236 LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1237 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1238 assert(query->active);
1239 if (query->has_draws) {
1240 if (last_start->have_gs != have_gs ||
1241 last_start->have_xfb != have_xfb) {
1242 suspendall = true;
1243 }
1244 }
1245 }
1246
1247 if (ctx->vertices_query) {
1248 query = ctx->vertices_query;
1249 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1250 assert(query->active);
1251 if (last_start->was_line_loop != ctx->was_line_loop) {
1252 suspendall = true;
1253 }
1254 }
1255 if (suspendall) {
1256 zink_suspend_queries(ctx, &ctx->batch);
1257 zink_resume_queries(ctx, &ctx->batch);
1258 }
1259
1260 LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1261 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1262 last_start->have_gs = have_gs;
1263 last_start->have_xfb = have_xfb;
1264 query->has_draws = true;
1265 }
1266 if (ctx->vertices_query) {
1267 query = ctx->vertices_query;
1268 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1269 last_start->was_line_loop = ctx->was_line_loop;
1270 query->has_draws = true;
1271 }
1272 }
1273
1274 static void
zink_set_active_query_state(struct pipe_context * pctx,bool enable)1275 zink_set_active_query_state(struct pipe_context *pctx, bool enable)
1276 {
1277 struct zink_context *ctx = zink_context(pctx);
1278 /* unordered blits already disable queries */
1279 if (ctx->unordered_blitting)
1280 return;
1281 ctx->queries_disabled = !enable;
1282
1283 struct zink_batch *batch = &ctx->batch;
1284 if (ctx->queries_disabled)
1285 zink_suspend_queries(ctx, batch);
1286 else if (ctx->batch.in_rp)
1287 zink_resume_queries(ctx, batch);
1288 }
1289
1290 void
zink_query_sync(struct zink_context * ctx,struct zink_query * query)1291 zink_query_sync(struct zink_context *ctx, struct zink_query *query)
1292 {
1293 if (query->needs_update)
1294 update_qbo(ctx, query);
1295 }
1296
1297 void
zink_start_conditional_render(struct zink_context * ctx)1298 zink_start_conditional_render(struct zink_context *ctx)
1299 {
1300 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active)
1301 return;
1302 struct zink_batch *batch = &ctx->batch;
1303 VkConditionalRenderingFlagsEXT begin_flags = 0;
1304 if (ctx->render_condition.inverted)
1305 begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
1306 VkConditionalRenderingBeginInfoEXT begin_info = {0};
1307 begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
1308 begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
1309 begin_info.flags = begin_flags;
1310 ctx->render_condition.query->predicate->obj->unordered_read = false;
1311 VKCTX(CmdBeginConditionalRenderingEXT)(batch->state->cmdbuf, &begin_info);
1312 zink_batch_reference_resource_rw(batch, ctx->render_condition.query->predicate, false);
1313 ctx->render_condition.active = true;
1314 }
1315
1316 void
zink_stop_conditional_render(struct zink_context * ctx)1317 zink_stop_conditional_render(struct zink_context *ctx)
1318 {
1319 zink_flush_dgc_if_enabled(ctx);
1320 struct zink_batch *batch = &ctx->batch;
1321 zink_clear_apply_conditionals(ctx);
1322 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active)
1323 return;
1324 VKCTX(CmdEndConditionalRenderingEXT)(batch->state->cmdbuf);
1325 ctx->render_condition.active = false;
1326 }
1327
1328 static void
zink_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)1329 zink_render_condition(struct pipe_context *pctx,
1330 struct pipe_query *pquery,
1331 bool condition,
1332 enum pipe_render_cond_flag mode)
1333 {
1334 struct zink_context *ctx = zink_context(pctx);
1335 struct zink_query *query = (struct zink_query *)pquery;
1336 zink_batch_no_rp(ctx);
1337 VkQueryResultFlagBits flags = 0;
1338
1339 zink_flush_dgc_if_enabled(ctx);
1340 if (query == NULL) {
1341 /* force conditional clears if they exist */
1342 if (ctx->clears_enabled && !ctx->batch.in_rp)
1343 zink_batch_rp(ctx);
1344 zink_stop_conditional_render(ctx);
1345 ctx->render_condition_active = false;
1346 ctx->render_condition.query = NULL;
1347 return;
1348 }
1349
1350 if (!query->predicate) {
1351 struct pipe_resource *pres;
1352
1353 /* need to create a vulkan buffer to copy the data into */
1354 pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));
1355 if (!pres)
1356 return;
1357
1358 query->predicate = zink_resource(pres);
1359 }
1360 if (query->predicate_dirty) {
1361 struct zink_resource *res = query->predicate;
1362
1363 if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)
1364 flags |= VK_QUERY_RESULT_WAIT_BIT;
1365
1366 flags |= VK_QUERY_RESULT_64_BIT;
1367 int num_results = get_num_starts(query);
1368 if (num_results) {
1369 if (!is_emulated_primgen(query) &&
1370 !is_so_overflow_query(query) &&
1371 num_results == 1) {
1372 copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
1373 } else {
1374 /* these need special handling */
1375 force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
1376 }
1377 } else {
1378 uint64_t zero = 0;
1379 tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero);
1380 }
1381 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT);
1382 query->predicate_dirty = false;
1383 }
1384 ctx->render_condition.inverted = condition;
1385 ctx->render_condition_active = true;
1386 ctx->render_condition.query = query;
1387 if (ctx->batch.in_rp)
1388 zink_start_conditional_render(ctx);
1389 }
1390
1391 static void
zink_get_query_result_resource(struct pipe_context * pctx,struct pipe_query * pquery,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * pres,unsigned offset)1392 zink_get_query_result_resource(struct pipe_context *pctx,
1393 struct pipe_query *pquery,
1394 enum pipe_query_flags flags,
1395 enum pipe_query_value_type result_type,
1396 int index,
1397 struct pipe_resource *pres,
1398 unsigned offset)
1399 {
1400 struct zink_context *ctx = zink_context(pctx);
1401 struct zink_screen *screen = zink_screen(pctx->screen);
1402 struct zink_query *query = (struct zink_query*)pquery;
1403 struct zink_resource *res = zink_resource(pres);
1404 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
1405 VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
1406 unsigned num_queries = get_num_starts(query);
1407
1408 /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */
1409 uint64_t u64[4] = {0};
1410 unsigned src_offset = result_size * get_num_results(query);
1411 if (!num_queries) {
1412 tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1413 return;
1414 }
1415
1416 if (index == -1) {
1417 /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
1418 * in addition to the availability result, which is a problem if we're just trying to get availability data
1419 *
1420 * if we know that there's no valid buffer data in the preceding buffer range, then we can just
1421 * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the
1422 * buffer
1423 */
1424
1425 VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
1426 if (zink_batch_usage_check_completion(ctx, query->batch_uses)) {
1427 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1428 unsigned query_id = start->vkq[0]->query_id;
1429 VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
1430 sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1431 if (result == VK_SUCCESS) {
1432 tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1433 return;
1434 } else {
1435 mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result));
1436 }
1437 }
1438 struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
1439 copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1440 zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size);
1441 pipe_resource_reference(&staging, NULL);
1442 return;
1443 }
1444
1445 /*
1446 there is an implicit execution dependency from
1447 each such query command to all query commands previously submitted to the same queue. There
1448 is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
1449 include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
1450 the results of vkCmdEndQuery are available.
1451
1452 * - Chapter 18. Queries
1453 */
1454 size_flags |= VK_QUERY_RESULT_WAIT_BIT;
1455 if (!is_time_query(query) && !is_bool_query(query)) {
1456 if (num_queries == 1 && !is_emulated_primgen(query) &&
1457 query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
1458 !is_bool_query(query)) {
1459 if (size_flags == VK_QUERY_RESULT_64_BIT) {
1460 if (query->needs_update)
1461 update_qbo(ctx, query);
1462 /* internal qbo always writes 64bit value so we can just direct copy */
1463 zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
1464 get_buffer_offset(query),
1465 result_size);
1466 } else
1467 /* have to do a new copy for 32bit */
1468 copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);
1469 return;
1470 }
1471 }
1472
1473 /* TODO: use CS to aggregate results */
1474
1475 /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either
1476 * clobbering all but the last result or writing the results sequentially, so we have to manually write the result
1477 */
1478 force_cpu_read(ctx, pquery, result_type, pres, offset);
1479 }
1480
1481 uint64_t
zink_get_timestamp(struct pipe_screen * pscreen)1482 zink_get_timestamp(struct pipe_screen *pscreen)
1483 {
1484 struct zink_screen *screen = zink_screen(pscreen);
1485 uint64_t timestamp, deviation;
1486 if (screen->info.have_EXT_calibrated_timestamps) {
1487 VkCalibratedTimestampInfoEXT cti = {0};
1488 cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
1489 cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
1490 VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, ×tamp, &deviation);
1491 if (result != VK_SUCCESS) {
1492 mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result));
1493 }
1494 } else {
1495 zink_screen_lock_context(screen);
1496 struct pipe_context *pctx = &screen->copy_context->base;
1497 struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
1498 if (!pquery)
1499 return 0;
1500 union pipe_query_result result = {0};
1501 pctx->begin_query(pctx, pquery);
1502 pctx->end_query(pctx, pquery);
1503 pctx->get_query_result(pctx, pquery, true, &result);
1504 pctx->destroy_query(pctx, pquery);
1505 zink_screen_unlock_context(screen);
1506 timestamp = result.u64;
1507 }
1508 timestamp_to_nanoseconds(screen, ×tamp);
1509 return timestamp;
1510 }
1511
1512 void
zink_context_query_init(struct pipe_context * pctx)1513 zink_context_query_init(struct pipe_context *pctx)
1514 {
1515 struct zink_context *ctx = zink_context(pctx);
1516 list_inithead(&ctx->suspended_queries);
1517 list_inithead(&ctx->primitives_generated_queries);
1518
1519 pctx->create_query = zink_create_query;
1520 pctx->destroy_query = zink_destroy_query;
1521 pctx->begin_query = zink_begin_query;
1522 pctx->end_query = zink_end_query;
1523 pctx->get_query_result = zink_get_query_result;
1524 pctx->get_query_result_resource = zink_get_query_result_resource;
1525 pctx->set_active_query_state = zink_set_active_query_state;
1526 pctx->render_condition = zink_render_condition;
1527 }
1528
1529 int
zink_get_driver_query_group_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_group_info * info)1530 zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
1531 struct pipe_driver_query_group_info *info)
1532 {
1533 if (!info)
1534 return 1;
1535
1536 assert(index == 0);
1537 info->name = "Zink counters";
1538 info->max_active_queries = ARRAY_SIZE(zink_specific_queries);
1539 info->num_queries = ARRAY_SIZE(zink_specific_queries);
1540
1541 return 1;
1542 }
1543
1544 int
zink_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)1545 zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
1546 struct pipe_driver_query_info *info)
1547 {
1548 if (!info)
1549 return ARRAY_SIZE(zink_specific_queries);
1550
1551 assert(index < ARRAY_SIZE(zink_specific_queries));
1552 *info = zink_specific_queries[index];
1553
1554 return 1;
1555 }
1556