1 #include "zink_query.h"
2
3 #include "zink_context.h"
4 #include "zink_clear.h"
5 #include "zink_program.h"
6 #include "zink_resource.h"
7 #include "zink_screen.h"
8
9 #include "util/u_dump.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12
13 #define NUM_QUERIES 500
14 #define NOWAIT_CHECK_THRESHOLD 10 //prevent spinning
15
16 #define ZINK_QUERY_RENDER_PASSES (PIPE_QUERY_DRIVER_SPECIFIC + 0)
17
18 struct zink_query_pool {
19 struct list_head list;
20 VkQueryType vk_query_type;
21 VkQueryPipelineStatisticFlags pipeline_stats;
22 VkQueryPool query_pool;
23 unsigned last_range;
24 unsigned refcount;
25 };
26
27 struct zink_query_buffer {
28 struct list_head list;
29 unsigned num_results;
30 struct pipe_resource *buffers[PIPE_MAX_VERTEX_STREAMS];
31 };
32
33 struct zink_vk_query {
34 struct zink_query_pool *pool;
35 unsigned query_id;
36 bool needs_reset;
37 bool started;
38 uint32_t refcount;
39 };
40
41 struct zink_query_start {
42 union {
43 struct {
44 bool have_gs;
45 bool have_xfb;
46 bool was_line_loop;
47 };
48 uint32_t data;
49 };
50 struct zink_vk_query *vkq[PIPE_MAX_VERTEX_STREAMS];
51 };
52
53 struct zink_query {
54 struct threaded_query base;
55 enum pipe_query_type type;
56
57 /* Everytime the gallium query needs
58 * another vulkan query, add a new start.
59 */
60 struct util_dynarray starts;
61 unsigned start_offset;
62
63 VkQueryType vkqtype;
64 unsigned index;
65 bool precise;
66
67 bool active; /* query is considered active by vk */
68 bool needs_reset; /* query is considered active by vk and cannot be destroyed */
69 bool dead; /* query should be destroyed when its fence finishes */
70 bool needs_update; /* query needs to update its qbos */
71 bool needs_rast_discard_workaround; /* query needs discard disabled */
72 bool suspended;
73 bool started_in_rp; //needs to be stopped in rp
74
75 struct list_head active_list;
76
77 struct list_head stats_list; /* when active, statistics queries are added to ctx->primitives_generated_queries */
78 bool has_draws; /* have_gs and have_xfb are valid for idx=curr_query */
79
80 struct zink_batch_usage *batch_uses; //batch that the query was started in
81 unsigned result_check_counter; //incremented for nowait checks
82
83 struct list_head buffers;
84 unsigned buffer_count;
85 union {
86 struct zink_query_buffer *curr_qbo;
87 struct pipe_fence_handle *fence; //PIPE_QUERY_GPU_FINISHED
88 };
89
90 struct zink_resource *predicate;
91 bool predicate_dirty;
92 };
93
94 static const struct pipe_driver_query_info zink_specific_queries[] = {
95 {"render-passes", ZINK_QUERY_RENDER_PASSES, { 0 }},
96 };
97
98 static inline int
get_num_starts(struct zink_query * q)99 get_num_starts(struct zink_query *q)
100 {
101 return util_dynarray_num_elements(&q->starts, struct zink_query_start);
102 }
103
104 static void
105 update_query_id(struct zink_context *ctx, struct zink_query *q);
106
107
108 static VkQueryPipelineStatisticFlags
pipeline_statistic_convert(enum pipe_statistics_query_index idx)109 pipeline_statistic_convert(enum pipe_statistics_query_index idx)
110 {
111 unsigned map[] = {
112 [PIPE_STAT_QUERY_IA_VERTICES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
113 [PIPE_STAT_QUERY_IA_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
114 [PIPE_STAT_QUERY_VS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
115 [PIPE_STAT_QUERY_GS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
116 [PIPE_STAT_QUERY_GS_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
117 [PIPE_STAT_QUERY_C_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
118 [PIPE_STAT_QUERY_C_PRIMITIVES] = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
119 [PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
120 [PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
121 [PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
122 [PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
123 };
124 assert(idx < ARRAY_SIZE(map));
125 return map[idx];
126 }
127
128 static void
begin_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index,VkQueryControlFlags flags)129 begin_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index,
130 VkQueryControlFlags flags)
131 {
132 if (!vkq->started) {
133 VKCTX(CmdBeginQueryIndexedEXT)(ctx->bs->cmdbuf,
134 vkq->pool->query_pool,
135 vkq->query_id,
136 flags,
137 index);
138 vkq->started = true;
139 }
140 }
141
142 static void
end_vk_query_indexed(struct zink_context * ctx,struct zink_vk_query * vkq,int index)143 end_vk_query_indexed(struct zink_context *ctx, struct zink_vk_query *vkq, int index)
144 {
145 if (vkq->started) {
146 VKCTX(CmdEndQueryIndexedEXT)(ctx->bs->cmdbuf,
147 vkq->pool->query_pool,
148 vkq->query_id, index);
149 vkq->started = false;
150 }
151 }
152
153 static void
reset_vk_query_pool(struct zink_context * ctx,struct zink_vk_query * vkq)154 reset_vk_query_pool(struct zink_context *ctx, struct zink_vk_query *vkq)
155 {
156 if (vkq->needs_reset) {
157 VKCTX(CmdResetQueryPool)(ctx->bs->reordered_cmdbuf, vkq->pool->query_pool, vkq->query_id, 1);
158 ctx->bs->has_reordered_work = true;
159 }
160 vkq->needs_reset = false;
161 }
162
163 void
zink_context_destroy_query_pools(struct zink_context * ctx)164 zink_context_destroy_query_pools(struct zink_context *ctx)
165 {
166 struct zink_screen *screen = zink_screen(ctx->base.screen);
167 list_for_each_entry_safe(struct zink_query_pool, pool, &ctx->query_pools, list) {
168 VKSCR(DestroyQueryPool)(screen->dev, pool->query_pool, NULL);
169 list_del(&pool->list);
170 FREE(pool);
171 }
172 }
173
174 static struct zink_query_pool *
find_or_allocate_qp(struct zink_context * ctx,struct zink_query * q,unsigned idx)175 find_or_allocate_qp(struct zink_context *ctx, struct zink_query *q, unsigned idx)
176 {
177 VkQueryPipelineStatisticFlags pipeline_stats = 0;
178 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
179 pipeline_stats = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
180 VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT;
181 else if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE)
182 pipeline_stats = pipeline_statistic_convert(q->index);
183
184 VkQueryType vk_query_type = q->vkqtype;
185 /* if xfb is active, we need to use an xfb query, otherwise we need pipeline statistics */
186 if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && idx == 1) {
187 vk_query_type = VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
188 pipeline_stats = 0;
189 }
190
191 struct zink_screen *screen = zink_screen(ctx->base.screen);
192 list_for_each_entry(struct zink_query_pool, pool, &ctx->query_pools, list) {
193 if (pool->vk_query_type == vk_query_type) {
194 if (vk_query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
195 if (pool->pipeline_stats == pipeline_stats)
196 return pool;
197 } else
198 return pool;
199 }
200 }
201
202 struct zink_query_pool *new_pool = CALLOC_STRUCT(zink_query_pool);
203 if (!new_pool)
204 return NULL;
205
206 new_pool->vk_query_type = vk_query_type;
207 new_pool->pipeline_stats = pipeline_stats;
208
209 VkQueryPoolCreateInfo pool_create = {0};
210 pool_create.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
211 pool_create.queryType = vk_query_type;
212 pool_create.queryCount = NUM_QUERIES;
213 pool_create.pipelineStatistics = pipeline_stats;
214
215 VkResult status = VKSCR(CreateQueryPool)(screen->dev, &pool_create, NULL, &new_pool->query_pool);
216 if (status != VK_SUCCESS) {
217 mesa_loge("ZINK: vkCreateQueryPool failed (%s)", vk_Result_to_str(status));
218 FREE(new_pool);
219 return NULL;
220 }
221
222 list_addtail(&new_pool->list, &ctx->query_pools);
223 return new_pool;
224 }
225
226 static void
227 update_qbo(struct zink_context *ctx, struct zink_query *q);
228 static void
229 reset_qbos(struct zink_context *ctx, struct zink_query *q);
230
231
232 static bool
is_emulated_primgen(const struct zink_query * q)233 is_emulated_primgen(const struct zink_query *q)
234 {
235 return q->type == PIPE_QUERY_PRIMITIVES_GENERATED &&
236 q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT;
237 }
238
239 static inline unsigned
get_num_query_pools(struct zink_query * q)240 get_num_query_pools(struct zink_query *q)
241 {
242 if (is_emulated_primgen(q))
243 return 2;
244 return 1;
245 }
246
247 static inline unsigned
get_num_queries(struct zink_query * q)248 get_num_queries(struct zink_query *q)
249 {
250 if (is_emulated_primgen(q))
251 return 2;
252 if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
253 return PIPE_MAX_VERTEX_STREAMS;
254 return 1;
255 }
256
257 static inline unsigned
get_num_results(struct zink_query * q)258 get_num_results(struct zink_query *q)
259 {
260 if (q->type < PIPE_QUERY_DRIVER_SPECIFIC &&
261 q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
262 return 1;
263 switch (q->type) {
264 case PIPE_QUERY_OCCLUSION_COUNTER:
265 case PIPE_QUERY_OCCLUSION_PREDICATE:
266 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
267 case PIPE_QUERY_TIME_ELAPSED:
268 case PIPE_QUERY_TIMESTAMP:
269 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
270 return 1;
271 case PIPE_QUERY_PRIMITIVES_GENERATED:
272 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
273 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
274 case PIPE_QUERY_PRIMITIVES_EMITTED:
275 return 2;
276 default:
277 debug_printf("unknown query: %s\n",
278 util_str_query_type(q->type, true));
279 unreachable("zink: unknown query type");
280 }
281 }
282
283 static void
timestamp_to_nanoseconds(struct zink_screen * screen,uint64_t * timestamp)284 timestamp_to_nanoseconds(struct zink_screen *screen, uint64_t *timestamp)
285 {
286 /* The number of valid bits in a timestamp value is determined by
287 * the VkQueueFamilyProperties::timestampValidBits property of the queue on which the timestamp is written.
288 * - 17.5. Timestamp Queries
289 */
290 if (screen->timestamp_valid_bits < 64)
291 *timestamp &= (1ull << screen->timestamp_valid_bits) - 1;
292
293 /* The number of nanoseconds it takes for a timestamp value to be incremented by 1
294 * can be obtained from VkPhysicalDeviceLimits::timestampPeriod
295 * - 17.5. Timestamp Queries
296 */
297 *timestamp *= (double)screen->info.props.limits.timestampPeriod;
298 }
299
300 static VkQueryType
convert_query_type(struct zink_screen * screen,enum pipe_query_type query_type,bool * precise)301 convert_query_type(struct zink_screen *screen, enum pipe_query_type query_type, bool *precise)
302 {
303 *precise = false;
304 switch (query_type) {
305 case PIPE_QUERY_OCCLUSION_COUNTER:
306 *precise = true;
307 FALLTHROUGH;
308 case PIPE_QUERY_OCCLUSION_PREDICATE:
309 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
310 return VK_QUERY_TYPE_OCCLUSION;
311 case PIPE_QUERY_TIME_ELAPSED:
312 case PIPE_QUERY_TIMESTAMP:
313 return VK_QUERY_TYPE_TIMESTAMP;
314 case PIPE_QUERY_PRIMITIVES_GENERATED:
315 return screen->info.have_EXT_primitives_generated_query ?
316 VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT :
317 VK_QUERY_TYPE_PIPELINE_STATISTICS;
318 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
319 return VK_QUERY_TYPE_PIPELINE_STATISTICS;
320 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
321 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
322 case PIPE_QUERY_PRIMITIVES_EMITTED:
323 return VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT;
324 default:
325 debug_printf("unknown query: %s\n",
326 util_str_query_type(query_type, true));
327 unreachable("zink: unknown query type");
328 }
329 }
330
331 static bool
needs_stats_list(struct zink_query * query)332 needs_stats_list(struct zink_query *query)
333 {
334 return is_emulated_primgen(query) ||
335 query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
336 query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
337 }
338
339 static bool
is_time_query(struct zink_query * query)340 is_time_query(struct zink_query *query)
341 {
342 return query->type == PIPE_QUERY_TIMESTAMP || query->type == PIPE_QUERY_TIME_ELAPSED;
343 }
344
345 static bool
is_so_overflow_query(struct zink_query * query)346 is_so_overflow_query(struct zink_query *query)
347 {
348 return query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || query->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE;
349 }
350
351 static bool
is_bool_query(struct zink_query * query)352 is_bool_query(struct zink_query *query)
353 {
354 return is_so_overflow_query(query) ||
355 query->type == PIPE_QUERY_OCCLUSION_PREDICATE ||
356 query->type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE ||
357 query->type == PIPE_QUERY_GPU_FINISHED;
358 }
359
360 static bool
qbo_append(struct pipe_screen * screen,struct zink_query * query)361 qbo_append(struct pipe_screen *screen, struct zink_query *query)
362 {
363 if (query->curr_qbo && query->curr_qbo->list.next)
364 return true;
365 struct zink_query_buffer *qbo = CALLOC_STRUCT(zink_query_buffer);
366 if (!qbo)
367 return false;
368 int num_buffers = get_num_queries(query);
369
370 for (unsigned i = 0; i < num_buffers; i++) {
371 qbo->buffers[i] = pipe_buffer_create(screen, PIPE_BIND_QUERY_BUFFER,
372 PIPE_USAGE_STAGING,
373 /* this is the maximum possible size of the results in a given buffer */
374 (query->type == PIPE_QUERY_TIMESTAMP ? 1 : NUM_QUERIES) * get_num_results(query) * sizeof(uint64_t));
375 if (!qbo->buffers[i])
376 goto fail;
377 }
378 list_addtail(&qbo->list, &query->buffers);
379 query->buffer_count++;
380
381 return true;
382 fail:
383 for (unsigned i = 0; i < num_buffers; i++)
384 pipe_resource_reference(&qbo->buffers[i], NULL);
385 FREE(qbo);
386 return false;
387 }
388
389 static void
unref_vk_pool(struct zink_context * ctx,struct zink_query_pool * pool)390 unref_vk_pool(struct zink_context *ctx, struct zink_query_pool *pool)
391 {
392 if (!pool || --pool->refcount)
393 return;
394 util_dynarray_append(&ctx->bs->dead_querypools, VkQueryPool, pool->query_pool);
395 if (list_is_linked(&pool->list))
396 list_del(&pool->list);
397 FREE(pool);
398 }
399
400 static void
unref_vk_query(struct zink_context * ctx,struct zink_vk_query * vkq)401 unref_vk_query(struct zink_context *ctx, struct zink_vk_query *vkq)
402 {
403 if (!vkq)
404 return;
405 unref_vk_pool(ctx, vkq->pool);
406 vkq->refcount--;
407 if (vkq->refcount == 0)
408 FREE(vkq);
409 }
410
411 static void
destroy_query(struct zink_context * ctx,struct zink_query * query)412 destroy_query(struct zink_context *ctx, struct zink_query *query)
413 {
414 ASSERTED struct zink_screen *screen = zink_screen(ctx->base.screen);
415 assert(zink_screen_usage_check_completion(screen, query->batch_uses));
416 struct zink_query_buffer *qbo, *next;
417
418 struct zink_query_start *starts = query->starts.data;
419 unsigned num_starts = query->starts.capacity / sizeof(struct zink_query_start);
420 for (unsigned j = 0; j < num_starts; j++) {
421 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
422 unref_vk_query(ctx, starts[j].vkq[i]);
423 }
424 }
425
426 util_dynarray_fini(&query->starts);
427 LIST_FOR_EACH_ENTRY_SAFE(qbo, next, &query->buffers, list) {
428 for (unsigned i = 0; i < ARRAY_SIZE(qbo->buffers); i++)
429 pipe_resource_reference(&qbo->buffers[i], NULL);
430 FREE(qbo);
431 }
432 pipe_resource_reference((struct pipe_resource**)&query->predicate, NULL);
433 FREE(query);
434 }
435
436 static void
reset_qbo(struct zink_query * q)437 reset_qbo(struct zink_query *q)
438 {
439 q->curr_qbo = list_first_entry(&q->buffers, struct zink_query_buffer, list);
440 q->curr_qbo->num_results = 0;
441 }
442
443 static void
query_pool_get_range(struct zink_context * ctx,struct zink_query * q)444 query_pool_get_range(struct zink_context *ctx, struct zink_query *q)
445 {
446 bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
447 struct zink_query_start *start;
448 int num_queries = get_num_queries(q);
449 if (!is_timestamp || get_num_starts(q) == 0) {
450 size_t size = q->starts.capacity;
451 start = util_dynarray_grow(&q->starts, struct zink_query_start, 1);
452 if (size != q->starts.capacity) {
453 /* when resizing, always zero the new data to avoid garbage */
454 uint8_t *data = q->starts.data;
455 memset(data + size, 0, q->starts.capacity - size);
456 }
457 } else {
458 start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
459 }
460 start->data = 0;
461
462 unsigned num_pools = get_num_query_pools(q);
463 for (unsigned i = 0; i < num_queries; i++) {
464 int pool_idx = num_pools > 1 ? i : 0;
465 /* try and find the active query for this */
466 struct zink_vk_query *vkq;
467 int xfb_idx = num_queries == 4 ? i : q->index;
468 if ((q->vkqtype == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
469 (pool_idx == 1)) && ctx->curr_xfb_queries[xfb_idx]) {
470 vkq = ctx->curr_xfb_queries[xfb_idx];
471 vkq->refcount++;
472 vkq->pool->refcount++;
473 } else {
474 struct zink_query_pool *pool = find_or_allocate_qp(ctx, q, pool_idx);
475 if (pool->last_range == NUM_QUERIES) {
476 list_del(&pool->list);
477 pool = find_or_allocate_qp(ctx, q, pool_idx);
478 }
479 vkq = CALLOC_STRUCT(zink_vk_query);
480 if (!vkq) {
481 mesa_loge("ZINK: failed to allocate vkq!");
482 return;
483 }
484
485 pool->refcount++;
486 vkq->refcount = 1;
487 vkq->needs_reset = true;
488 vkq->pool = pool;
489 vkq->started = false;
490 vkq->query_id = pool->last_range++;
491 }
492 unref_vk_query(ctx, start->vkq[i]);
493 start->vkq[i] = vkq;
494 }
495 }
496
497 static struct pipe_query *
zink_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)498 zink_create_query(struct pipe_context *pctx,
499 unsigned query_type, unsigned index)
500 {
501 struct zink_context *ctx = zink_context(pctx);
502 struct zink_screen *screen = zink_screen(pctx->screen);
503 struct zink_query *query = CALLOC_STRUCT(zink_query);
504
505 if (!query)
506 return NULL;
507 list_inithead(&query->buffers);
508
509 query->index = index;
510 query->type = query_type;
511
512 if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
513 return (struct pipe_query *)query;
514
515 if (query->type == PIPE_QUERY_GPU_FINISHED || query->type == PIPE_QUERY_TIMESTAMP_DISJOINT)
516 return (struct pipe_query *)query;
517 query->vkqtype = convert_query_type(screen, query_type, &query->precise);
518 if (query->vkqtype == -1)
519 return NULL;
520
521 util_dynarray_init(&query->starts, NULL);
522
523 assert(!query->precise || query->vkqtype == VK_QUERY_TYPE_OCCLUSION);
524
525 /* use emulated path for drivers without full support */
526 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && index &&
527 !screen->info.primgen_feats.primitivesGeneratedQueryWithNonZeroStreams)
528 query->vkqtype = VK_QUERY_TYPE_PIPELINE_STATISTICS;
529
530 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
531 query->needs_rast_discard_workaround = !screen->info.primgen_feats.primitivesGeneratedQueryWithRasterizerDiscard;
532 } else if (query_type == PIPE_QUERY_PRIMITIVES_GENERATED) {
533 query->needs_rast_discard_workaround = true;
534 }
535
536 if (!qbo_append(pctx->screen, query))
537 goto fail;
538 ctx->bs->has_work = true;
539 query->needs_reset = true;
540 query->predicate_dirty = true;
541 if (query->type == PIPE_QUERY_TIMESTAMP) {
542 query->active = true;
543 /* defer pool reset until end_query since we're guaranteed to be threadsafe then */
544 reset_qbo(query);
545 }
546 return (struct pipe_query *)query;
547 fail:
548 destroy_query(zink_context(pctx), query);
549 return NULL;
550 }
551
552 static void
zink_destroy_query(struct pipe_context * pctx,struct pipe_query * q)553 zink_destroy_query(struct pipe_context *pctx,
554 struct pipe_query *q)
555 {
556 struct zink_query *query = (struct zink_query *)q;
557
558 /* only destroy if this query isn't active on any batches,
559 * otherwise just mark dead and wait
560 */
561 if (query->batch_uses) {
562 query->dead = true;
563 return;
564 }
565
566 destroy_query(zink_context(pctx), query);
567 }
568
569 void
zink_prune_query(struct zink_batch_state * bs,struct zink_query * query)570 zink_prune_query(struct zink_batch_state *bs, struct zink_query *query)
571 {
572 if (!zink_batch_usage_matches(query->batch_uses, bs))
573 return;
574 query->batch_uses = NULL;
575 if (query->dead)
576 destroy_query(bs->ctx, query);
577 }
578
579 static void
check_query_results(struct zink_query * query,union pipe_query_result * result,int num_starts,uint64_t * results,uint64_t * xfb_results)580 check_query_results(struct zink_query *query, union pipe_query_result *result,
581 int num_starts, uint64_t *results, uint64_t *xfb_results)
582 {
583 uint64_t last_val = 0;
584 int result_size = get_num_results(query);
585 int idx = 0;
586 util_dynarray_foreach(&query->starts, struct zink_query_start, start) {
587 unsigned i = idx * result_size;
588 idx++;
589 switch (query->type) {
590 case PIPE_QUERY_OCCLUSION_PREDICATE:
591 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
592 case PIPE_QUERY_GPU_FINISHED:
593 result->b |= results[i] != 0;
594 break;
595
596 case PIPE_QUERY_TIME_ELAPSED:
597 case PIPE_QUERY_TIMESTAMP:
598 /* the application can sum the differences between all N queries to determine the total execution time.
599 * - 17.5. Timestamp Queries
600 */
601 if (query->type != PIPE_QUERY_TIME_ELAPSED || i)
602 result->u64 += results[i] - last_val;
603 last_val = results[i];
604 break;
605 case PIPE_QUERY_OCCLUSION_COUNTER:
606 result->u64 += results[i];
607 break;
608 case PIPE_QUERY_PRIMITIVES_GENERATED:
609 if (query->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
610 result->u64 += results[i];
611 else if (start->have_xfb || query->index)
612 result->u64 += xfb_results[i + 1];
613 else
614 /* if a given draw had a geometry shader, we need to use the first result */
615 result->u64 += results[i + !start->have_gs];
616 break;
617 case PIPE_QUERY_PRIMITIVES_EMITTED:
618 /* A query pool created with this type will capture 2 integers -
619 * numPrimitivesWritten and numPrimitivesNeeded -
620 * for the specified vertex stream output from the last vertex processing stage.
621 * - from VK_EXT_transform_feedback spec
622 */
623 result->u64 += results[i];
624 break;
625 case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
626 case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
627 /* A query pool created with this type will capture 2 integers -
628 * numPrimitivesWritten and numPrimitivesNeeded -
629 * for the specified vertex stream output from the last vertex processing stage.
630 * - from VK_EXT_transform_feedback spec
631 */
632 if (start->have_xfb)
633 result->b |= results[i] != results[i + 1];
634 break;
635 case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
636 switch (query->index) {
637 case PIPE_STAT_QUERY_IA_VERTICES:
638 result->u64 += start->was_line_loop ? results[i] / 2 : results[i];
639 break;
640 default:
641 result->u64 += results[i];
642 break;
643 }
644 break;
645
646 default:
647 debug_printf("unhandled query type: %s\n",
648 util_str_query_type(query->type, true));
649 unreachable("unexpected query type");
650 }
651 }
652 }
653
654 static bool
get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)655 get_query_result(struct pipe_context *pctx,
656 struct pipe_query *q,
657 bool wait,
658 union pipe_query_result *result)
659 {
660 struct zink_screen *screen = zink_screen(pctx->screen);
661 struct zink_query *query = (struct zink_query *)q;
662 unsigned flags = PIPE_MAP_READ;
663
664 if (!wait)
665 flags |= ZINK_MAP_QBO;
666 if (query->base.flushed)
667 /* this is not a context-safe operation; ensure map doesn't use slab alloc */
668 flags |= PIPE_MAP_THREAD_SAFE;
669
670 util_query_clear_result(result, query->type);
671
672 int num_starts = get_num_starts(query);
673 /* no results: return zero */
674 if (!num_starts)
675 return true;
676 int result_size = get_num_results(query) * sizeof(uint64_t);
677 int num_maps = get_num_queries(query);
678
679 struct zink_query_buffer *qbo;
680 struct pipe_transfer *xfer[PIPE_MAX_VERTEX_STREAMS] = { 0 };
681 LIST_FOR_EACH_ENTRY(qbo, &query->buffers, list) {
682 uint64_t *results[PIPE_MAX_VERTEX_STREAMS] = { NULL, NULL };
683 bool is_timestamp = query->type == PIPE_QUERY_TIMESTAMP;
684 if (!qbo->num_results)
685 continue;
686
687 for (unsigned i = 0; i < num_maps; i++) {
688 results[i] = pipe_buffer_map_range(pctx, qbo->buffers[i], 0,
689 (is_timestamp ? 1 : qbo->num_results) * result_size, flags, &xfer[i]);
690 if (!results[i]) {
691 if (wait)
692 debug_printf("zink: qbo read failed!");
693 goto fail;
694 }
695 }
696 if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
697 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS && !result->b; i++) {
698 check_query_results(query, result, num_starts, results[i], NULL);
699 }
700 } else
701 check_query_results(query, result, num_starts, results[0], results[1]);
702
703 for (unsigned i = 0 ; i < num_maps; i++)
704 pipe_buffer_unmap(pctx, xfer[i]);
705
706 /* if overflow is detected we can stop */
707 if (query->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE && result->b)
708 break;
709 }
710
711 if (is_time_query(query))
712 timestamp_to_nanoseconds(screen, &result->u64);
713
714 return true;
715 fail:
716 for (unsigned i = 0 ; i < num_maps; i++)
717 if (xfer[i])
718 pipe_buffer_unmap(pctx, xfer[i]);
719 return false;
720 }
721
722 static void
force_cpu_read(struct zink_context * ctx,struct pipe_query * pquery,enum pipe_query_value_type result_type,struct pipe_resource * pres,unsigned offset)723 force_cpu_read(struct zink_context *ctx, struct pipe_query *pquery, enum pipe_query_value_type result_type, struct pipe_resource *pres, unsigned offset)
724 {
725 struct pipe_context *pctx = &ctx->base;
726 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
727 struct zink_query *query = (struct zink_query*)pquery;
728 union pipe_query_result result = {0};
729
730 if (query->needs_update)
731 update_qbo(ctx, query);
732
733 bool success = get_query_result(pctx, pquery, true, &result);
734 if (!success) {
735 debug_printf("zink: getting query result failed\n");
736 return;
737 }
738
739 if (result_type <= PIPE_QUERY_TYPE_U32) {
740 uint32_t u32;
741 uint32_t limit;
742 if (result_type == PIPE_QUERY_TYPE_I32)
743 limit = INT_MAX;
744 else
745 limit = UINT_MAX;
746 if (is_bool_query(query))
747 u32 = result.b;
748 else
749 u32 = MIN2(limit, result.u64);
750 tc_buffer_write(pctx, pres, offset, result_size, &u32);
751 } else {
752 uint64_t u64;
753 if (is_bool_query(query))
754 u64 = result.b;
755 else
756 u64 = result.u64;
757 tc_buffer_write(pctx, pres, offset, result_size, &u64);
758 }
759 }
760
761 static void
copy_pool_results_to_buffer(struct zink_context * ctx,struct zink_query * query,VkQueryPool pool,unsigned query_id,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)762 copy_pool_results_to_buffer(struct zink_context *ctx, struct zink_query *query, VkQueryPool pool,
763 unsigned query_id, struct zink_resource *res, unsigned offset,
764 int num_results, VkQueryResultFlags flags)
765 {
766 unsigned type_size = (flags & VK_QUERY_RESULT_64_BIT) ? sizeof(uint64_t) : sizeof(uint32_t);
767 unsigned base_result_size = get_num_results(query) * type_size;
768 unsigned result_size = base_result_size * num_results;
769 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
770 result_size += type_size;
771
772 bool marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "update_qbo(%s: id=%u, num_results=%d)", vk_QueryType_to_str(query->vkqtype), query_id, num_results);
773
774 zink_batch_no_rp(ctx);
775 /* if it's a single query that doesn't need special handling, we can copy it and be done */
776 zink_batch_reference_resource_rw(ctx, res, true);
777 res->obj->access = VK_ACCESS_TRANSFER_WRITE_BIT;
778 res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFER_BIT;
779 util_range_add(&res->base.b, &res->valid_buffer_range, offset, offset + result_size);
780 assert(query_id < NUM_QUERIES);
781 res->obj->unordered_read = res->obj->unordered_write = false;
782 ctx->bs->has_work = true;
783 VKCTX(CmdCopyQueryPoolResults)(ctx->bs->cmdbuf, pool, query_id, num_results, res->obj->buffer,
784 offset, base_result_size, flags);
785 zink_cmd_debug_marker_end(ctx, ctx->bs->cmdbuf, marker);
786 }
787
788 static void
copy_results_to_buffer(struct zink_context * ctx,struct zink_query * query,struct zink_resource * res,unsigned offset,int num_results,VkQueryResultFlags flags)789 copy_results_to_buffer(struct zink_context *ctx, struct zink_query *query, struct zink_resource *res, unsigned offset, int num_results, VkQueryResultFlags flags)
790 {
791 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
792 copy_pool_results_to_buffer(ctx, query, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, res, offset, num_results, flags);
793 }
794
795
796 static void
reset_query_range(struct zink_context * ctx,struct zink_query * q)797 reset_query_range(struct zink_context *ctx, struct zink_query *q)
798 {
799 int num_queries = get_num_queries(q);
800 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
801 for (unsigned i = 0; i < num_queries; i++) {
802 reset_vk_query_pool(ctx, start->vkq[i]);
803 }
804 }
805
806 static void
reset_qbos(struct zink_context * ctx,struct zink_query * q)807 reset_qbos(struct zink_context *ctx, struct zink_query *q)
808 {
809 if (q->needs_update)
810 update_qbo(ctx, q);
811
812 q->needs_reset = false;
813 /* create new qbo for non-timestamp queries:
814 * timestamp queries should never need more than 2 entries in the qbo
815 */
816 if (q->type == PIPE_QUERY_TIMESTAMP)
817 return;
818 if (qbo_append(ctx->base.screen, q))
819 reset_qbo(q);
820 else
821 debug_printf("zink: qbo alloc failed on reset!");
822 }
823
824 static inline unsigned
get_buffer_offset(struct zink_query * q)825 get_buffer_offset(struct zink_query *q)
826 {
827 return (get_num_starts(q) - 1) * get_num_results(q) * sizeof(uint64_t);
828 }
829
830 static void
update_qbo(struct zink_context * ctx,struct zink_query * q)831 update_qbo(struct zink_context *ctx, struct zink_query *q)
832 {
833 struct zink_query_buffer *qbo = q->curr_qbo;
834 unsigned num_starts = get_num_starts(q);
835 struct zink_query_start *starts = q->starts.data;
836 bool is_timestamp = q->type == PIPE_QUERY_TIMESTAMP;
837 /* timestamp queries just write to offset 0 always */
838 int num_queries = get_num_queries(q);
839 unsigned num_results = qbo->num_results;
840 for (unsigned i = 0; i < num_queries; i++) {
841 unsigned start_offset = q->start_offset;
842 while (start_offset < num_starts) {
843 unsigned num_merged_copies = 0;
844 VkQueryPool qp = starts[start_offset].vkq[i]->pool->query_pool;
845 unsigned base_id = starts[start_offset].vkq[i]->query_id;
846 /* iterate over all the starts to see how many can be merged */
847 for (unsigned j = start_offset; j < num_starts; j++, num_merged_copies++) {
848 if (starts[j].vkq[i]->pool->query_pool != qp || starts[j].vkq[i]->query_id != base_id + num_merged_copies)
849 break;
850 }
851 assert(num_merged_copies);
852 unsigned cur_offset = start_offset * get_num_results(q) * sizeof(uint64_t);
853 unsigned offset = is_timestamp ? 0 : cur_offset;
854 copy_pool_results_to_buffer(ctx, q, starts[start_offset].vkq[i]->pool->query_pool, starts[start_offset].vkq[i]->query_id,
855 zink_resource(qbo->buffers[i]),
856 offset,
857 num_merged_copies,
858 /*
859 there is an implicit execution dependency from
860 each such query command to all query commands previously submitted to the same queue. There
861 is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
862 include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
863 the results of vkCmdEndQuery are available.
864
865 * - Chapter 18. Queries
866 */
867 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
868 if (!is_timestamp)
869 q->curr_qbo->num_results += num_merged_copies;
870 start_offset += num_merged_copies;
871 }
872 }
873 q->start_offset += q->curr_qbo->num_results - num_results;
874
875
876 if (is_timestamp)
877 q->curr_qbo->num_results = 1;
878
879 q->needs_update = false;
880 }
881
882 static void
begin_query(struct zink_context * ctx,struct zink_query * q)883 begin_query(struct zink_context *ctx, struct zink_query *q)
884 {
885 VkQueryControlFlags flags = 0;
886
887 if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
888 return;
889
890 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_CS_INVOCATIONS && ctx->in_rp) {
891 /* refuse to start CS queries in renderpasses */
892 if (!list_is_linked(&q->active_list))
893 list_addtail(&q->active_list, &ctx->suspended_queries);
894 q->suspended = true;
895 return;
896 }
897
898 update_query_id(ctx, q);
899 q->predicate_dirty = true;
900 if (q->needs_reset)
901 reset_qbos(ctx, q);
902 reset_query_range(ctx, q);
903 q->active = true;
904 ctx->bs->has_work = true;
905
906 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
907 if (q->type == PIPE_QUERY_TIME_ELAPSED) {
908 VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
909 if (!ctx->in_rp)
910 update_qbo(ctx, q);
911 zink_batch_usage_set(&q->batch_uses, ctx->bs);
912 _mesa_set_add(&ctx->bs->active_queries, q);
913 }
914 /* ignore the rest of begin_query for timestamps */
915 if (is_time_query(q))
916 return;
917
918 /* A query must either begin and end inside the same subpass of a render pass
919 instance, or must both begin and end outside of a render pass instance
920 (i.e. contain entire render pass instances).
921 - 18.2. Query Operation
922 */
923 q->started_in_rp = ctx->in_rp;
924
925 if (q->precise)
926 flags |= VK_QUERY_CONTROL_PRECISE_BIT;
927
928 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
929 is_emulated_primgen(q) ||
930 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
931 struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
932 assert(!ctx->curr_xfb_queries[q->index] || ctx->curr_xfb_queries[q->index] == vkq);
933 ctx->curr_xfb_queries[q->index] = vkq;
934
935 begin_vk_query_indexed(ctx, vkq, q->index, flags);
936 } else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
937 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
938 assert(!ctx->curr_xfb_queries[i] || ctx->curr_xfb_queries[i] == start->vkq[i]);
939 ctx->curr_xfb_queries[i] = start->vkq[i];
940
941 begin_vk_query_indexed(ctx, start->vkq[i], i, flags);
942 }
943 } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
944 begin_vk_query_indexed(ctx, start->vkq[0], q->index, flags);
945 }
946 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT && q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT)
947 VKCTX(CmdBeginQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id, flags);
948 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && q->index == PIPE_STAT_QUERY_IA_VERTICES) {
949 assert(!ctx->vertices_query);
950 ctx->vertices_query = q;
951 }
952 if (needs_stats_list(q))
953 list_addtail(&q->stats_list, &ctx->primitives_generated_queries);
954 zink_batch_usage_set(&q->batch_uses, ctx->bs);
955 _mesa_set_add(&ctx->bs->active_queries, q);
956 if (q->needs_rast_discard_workaround) {
957 ctx->primitives_generated_active = true;
958 if (zink_set_rasterizer_discard(ctx, true))
959 zink_set_null_fs(ctx);
960 }
961 }
962
963 static bool
zink_begin_query(struct pipe_context * pctx,struct pipe_query * q)964 zink_begin_query(struct pipe_context *pctx,
965 struct pipe_query *q)
966 {
967 struct zink_query *query = (struct zink_query *)q;
968 struct zink_context *ctx = zink_context(pctx);
969
970 /* drop all past results */
971 reset_qbo(query);
972
973 if (query->type < PIPE_QUERY_DRIVER_SPECIFIC && query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
974 ctx->occlusion_query_active = true;
975 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
976 ctx->fs_query_active = true;
977
978 query->predicate_dirty = true;
979
980 util_dynarray_clear(&query->starts);
981 query->start_offset = 0;
982
983 if (ctx->in_rp || (query->type == PIPE_QUERY_TIME_ELAPSED)) {
984 begin_query(ctx, query);
985 } else {
986 /* never directly start queries out of renderpass, always defer */
987 list_addtail(&query->active_list, &ctx->suspended_queries);
988 query->suspended = true;
989 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
990 ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
991 }
992
993 return true;
994 }
995
996 static void
update_query_id(struct zink_context * ctx,struct zink_query * q)997 update_query_id(struct zink_context *ctx, struct zink_query *q)
998 {
999 query_pool_get_range(ctx, q);
1000 ctx->bs->has_work = true;
1001 q->has_draws = false;
1002 }
1003
1004 static void
end_query(struct zink_context * ctx,struct zink_query * q)1005 end_query(struct zink_context *ctx, struct zink_query *q)
1006 {
1007 if (q->type == PIPE_QUERY_TIMESTAMP_DISJOINT || q->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1008 return;
1009
1010 ASSERTED struct zink_query_buffer *qbo = q->curr_qbo;
1011 assert(qbo);
1012 assert(!is_time_query(q));
1013 q->active = false;
1014 assert(q->started_in_rp == ctx->in_rp);
1015 struct zink_query_start *start = util_dynarray_top_ptr(&q->starts, struct zink_query_start);
1016
1017 if (q->type == PIPE_QUERY_PRIMITIVES_EMITTED ||
1018 is_emulated_primgen(q) ||
1019 q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
1020 struct zink_vk_query *vkq = start->vkq[1] ? start->vkq[1] : start->vkq[0];
1021
1022 end_vk_query_indexed(ctx, vkq, q->index);
1023 ctx->curr_xfb_queries[q->index] = NULL;
1024 }
1025 else if (q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
1026 for (unsigned i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
1027 end_vk_query_indexed(ctx, start->vkq[i], i);
1028 ctx->curr_xfb_queries[i] = NULL;
1029 }
1030 } else if (q->vkqtype == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) {
1031 end_vk_query_indexed(ctx, start->vkq[0], q->index);
1032 }
1033 if (q->vkqtype != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT &&
1034 q->vkqtype != VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT && !is_time_query(q))
1035 VKCTX(CmdEndQuery)(ctx->bs->cmdbuf, start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1036
1037 if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
1038 q->index == PIPE_STAT_QUERY_IA_VERTICES)
1039 ctx->vertices_query = NULL;
1040
1041 if (needs_stats_list(q))
1042 list_delinit(&q->stats_list);
1043
1044 q->needs_update = true;
1045 if (q->needs_rast_discard_workaround) {
1046 ctx->primitives_generated_active = false;
1047 if (zink_set_rasterizer_discard(ctx, false))
1048 zink_set_null_fs(ctx);
1049 }
1050 }
1051
1052 static bool
zink_end_query(struct pipe_context * pctx,struct pipe_query * q)1053 zink_end_query(struct pipe_context *pctx,
1054 struct pipe_query *q)
1055 {
1056 struct zink_context *ctx = zink_context(pctx);
1057 struct zink_query *query = (struct zink_query *)q;
1058
1059 if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT || query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
1060 return true;
1061
1062 if (query->type == PIPE_QUERY_GPU_FINISHED) {
1063 pctx->flush(pctx, &query->fence, PIPE_FLUSH_DEFERRED);
1064 return true;
1065 }
1066
1067 /* FIXME: this can be called from a thread, but it needs to write to the cmdbuf */
1068 threaded_context_unwrap_sync(pctx);
1069
1070 if (query->vkqtype == VK_QUERY_TYPE_OCCLUSION)
1071 ctx->occlusion_query_active = true;
1072 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
1073 ctx->fs_query_active = true;
1074
1075 bool unset_null_fs = query->type == PIPE_QUERY_PRIMITIVES_GENERATED && (ctx->primitives_generated_suspended || ctx->primitives_generated_active);
1076 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1077 ctx->primitives_generated_suspended = false;
1078
1079 if (list_is_linked(&query->stats_list))
1080 list_delinit(&query->stats_list);
1081 if (query->suspended) {
1082 list_delinit(&query->active_list);
1083 query->suspended = false;
1084 }
1085 if (is_time_query(query)) {
1086 update_query_id(ctx, query);
1087 if (query->needs_reset)
1088 reset_qbos(ctx, query);
1089 reset_query_range(ctx, query);
1090 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1091 VKCTX(CmdWriteTimestamp)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
1092 start->vkq[0]->pool->query_pool, start->vkq[0]->query_id);
1093 ctx->bs->has_work = true;
1094 zink_batch_usage_set(&query->batch_uses, ctx->bs);
1095 _mesa_set_add(&ctx->bs->active_queries, query);
1096 query->needs_update = true;
1097 } else if (query->active) {
1098 /* this should be a tc-optimized query end that doesn't split a renderpass */
1099 if (!query->started_in_rp)
1100 zink_batch_no_rp(ctx);
1101 end_query(ctx, query);
1102 }
1103
1104 if (unset_null_fs)
1105 zink_set_null_fs(ctx);
1106
1107 return true;
1108 }
1109
1110 static bool
zink_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)1111 zink_get_query_result(struct pipe_context *pctx,
1112 struct pipe_query *q,
1113 bool wait,
1114 union pipe_query_result *result)
1115 {
1116 struct zink_query *query = (void*)q;
1117 struct zink_context *ctx = zink_context(pctx);
1118 struct zink_screen *screen = zink_screen(pctx->screen);
1119
1120 if (query->type == PIPE_QUERY_TIMESTAMP_DISJOINT) {
1121 result->timestamp_disjoint.frequency = screen->info.props.limits.timestampPeriod * 1000000.0;
1122 result->timestamp_disjoint.disjoint = false;
1123 return true;
1124 }
1125
1126 if (query->type == PIPE_QUERY_GPU_FINISHED) {
1127 struct pipe_screen *pscreen = pctx->screen;
1128
1129 result->b = pscreen->fence_finish(pscreen, query->base.flushed ? NULL : pctx,
1130 query->fence, wait ? OS_TIMEOUT_INFINITE : 0);
1131 return result->b;
1132 }
1133
1134 if (query->type == ZINK_QUERY_RENDER_PASSES) {
1135 result->u64 = ctx->hud.render_passes;
1136 ctx->hud.render_passes = 0;
1137 return true;
1138 }
1139
1140 if (query->needs_update) {
1141 assert(!ctx->tc || !threaded_query(q)->flushed);
1142 update_qbo(ctx, query);
1143 }
1144
1145 if (zink_batch_usage_is_unflushed(query->batch_uses)) {
1146 if (!threaded_query(q)->flushed)
1147 pctx->flush(pctx, NULL, 0);
1148 if (!wait)
1149 return false;
1150 }
1151
1152 /* TODO: if syncobj/semaphore waits ever get faster delete all this */
1153 if (!wait && !zink_screen_usage_check_completion_fast(screen, query->batch_uses)) {
1154 if (query->result_check_counter++ < NOWAIT_CHECK_THRESHOLD)
1155 return false;
1156 /* simple queries can use the "fast" path which (probably) avoids directly accessing a syncobj */
1157 if (query->buffer_count == 1 && get_num_results(query) == 1 && query->type != PIPE_QUERY_TIME_ELAPSED) {
1158 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1159 unsigned query_id = start->vkq[0]->query_id;
1160 VkResult ret = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
1161 sizeof(result->u64), &result->u64, 0, VK_QUERY_RESULT_64_BIT);
1162 if (is_time_query(query))
1163 timestamp_to_nanoseconds(screen, &result->u64);
1164 return ret == VK_SUCCESS;
1165 }
1166 /* other queries have to check the syncobj */
1167 if (!zink_screen_usage_check_completion(screen, query->batch_uses))
1168 return false;
1169 }
1170
1171 return get_query_result(pctx, q, wait, result);
1172 }
1173
1174 static void
suspend_query(struct zink_context * ctx,struct zink_query * query)1175 suspend_query(struct zink_context *ctx, struct zink_query *query)
1176 {
1177 /* if a query isn't active here then we don't need to reactivate it on the next batch */
1178 if (query->active && !is_time_query(query))
1179 end_query(ctx, query);
1180 if (query->needs_update && !ctx->in_rp)
1181 update_qbo(ctx, query);
1182 }
1183
1184 static void
suspend_queries(struct zink_context * ctx,bool rp_only)1185 suspend_queries(struct zink_context *ctx, bool rp_only)
1186 {
1187 set_foreach(&ctx->bs->active_queries, entry) {
1188 struct zink_query *query = (void*)entry->key;
1189 if (query->suspended || (rp_only && !query->started_in_rp))
1190 continue;
1191 if (query->active && !is_time_query(query)) {
1192 /* the fence is going to steal the set off the batch, so we have to copy
1193 * the active queries onto a list
1194 */
1195 list_addtail(&query->active_list, &ctx->suspended_queries);
1196 query->suspended = true;
1197 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1198 ctx->primitives_generated_suspended = query->needs_rast_discard_workaround;
1199 }
1200 suspend_query(ctx, query);
1201 }
1202 }
1203
1204 void
zink_suspend_queries(struct zink_context * ctx)1205 zink_suspend_queries(struct zink_context *ctx)
1206 {
1207 suspend_queries(ctx, false);
1208 }
1209
1210 void
zink_resume_queries(struct zink_context * ctx)1211 zink_resume_queries(struct zink_context *ctx)
1212 {
1213 struct zink_query *query, *next;
1214 LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1215 list_delinit(&query->active_list);
1216 query->suspended = false;
1217 if (query->type == PIPE_QUERY_PRIMITIVES_GENERATED)
1218 ctx->primitives_generated_suspended = false;
1219 if (query->needs_update && !ctx->in_rp)
1220 update_qbo(ctx, query);
1221 begin_query(ctx, query);
1222 }
1223 }
1224
1225 void
zink_resume_cs_query(struct zink_context * ctx)1226 zink_resume_cs_query(struct zink_context *ctx)
1227 {
1228 struct zink_query *query, *next;
1229 LIST_FOR_EACH_ENTRY_SAFE(query, next, &ctx->suspended_queries, active_list) {
1230 if (query->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && query->index == PIPE_STAT_QUERY_CS_INVOCATIONS) {
1231 list_delinit(&query->active_list);
1232 query->suspended = false;
1233 begin_query(ctx, query);
1234 }
1235 }
1236 }
1237
1238 void
zink_query_renderpass_suspend(struct zink_context * ctx)1239 zink_query_renderpass_suspend(struct zink_context *ctx)
1240 {
1241 suspend_queries(ctx, true);
1242 }
1243
1244 void
zink_query_update_gs_states(struct zink_context * ctx)1245 zink_query_update_gs_states(struct zink_context *ctx)
1246 {
1247 struct zink_query *query;
1248 bool suspendall = false;
1249 bool have_gs = !!ctx->gfx_stages[MESA_SHADER_GEOMETRY];
1250 bool have_xfb = !!ctx->num_so_targets;
1251
1252 LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1253 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1254 assert(query->active);
1255 if (query->has_draws) {
1256 if (last_start->have_gs != have_gs ||
1257 last_start->have_xfb != have_xfb) {
1258 suspendall = true;
1259 }
1260 }
1261 }
1262
1263 if (ctx->vertices_query) {
1264 query = ctx->vertices_query;
1265 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1266 assert(query->active);
1267 if (last_start->was_line_loop != ctx->was_line_loop) {
1268 suspendall = true;
1269 }
1270 }
1271 if (suspendall) {
1272 zink_suspend_queries(ctx);
1273 zink_resume_queries(ctx);
1274 }
1275
1276 LIST_FOR_EACH_ENTRY(query, &ctx->primitives_generated_queries, stats_list) {
1277 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1278 last_start->have_gs = have_gs;
1279 last_start->have_xfb = have_xfb;
1280 query->has_draws = true;
1281 }
1282 if (ctx->vertices_query) {
1283 query = ctx->vertices_query;
1284 struct zink_query_start *last_start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1285 last_start->was_line_loop = ctx->was_line_loop;
1286 query->has_draws = true;
1287 }
1288 }
1289
1290 static void
zink_set_active_query_state(struct pipe_context * pctx,bool enable)1291 zink_set_active_query_state(struct pipe_context *pctx, bool enable)
1292 {
1293 struct zink_context *ctx = zink_context(pctx);
1294 /* unordered blits already disable queries */
1295 if (ctx->unordered_blitting)
1296 return;
1297 ctx->queries_disabled = !enable;
1298
1299 if (ctx->queries_disabled)
1300 zink_suspend_queries(ctx);
1301 else if (ctx->in_rp)
1302 zink_resume_queries(ctx);
1303 }
1304
1305 void
zink_query_sync(struct zink_context * ctx,struct zink_query * query)1306 zink_query_sync(struct zink_context *ctx, struct zink_query *query)
1307 {
1308 if (query->needs_update)
1309 update_qbo(ctx, query);
1310 }
1311
1312 void
zink_start_conditional_render(struct zink_context * ctx)1313 zink_start_conditional_render(struct zink_context *ctx)
1314 {
1315 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || ctx->render_condition.active)
1316 return;
1317 VkConditionalRenderingFlagsEXT begin_flags = 0;
1318 if (ctx->render_condition.inverted)
1319 begin_flags = VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
1320 VkConditionalRenderingBeginInfoEXT begin_info = {0};
1321 begin_info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
1322 begin_info.buffer = ctx->render_condition.query->predicate->obj->buffer;
1323 begin_info.flags = begin_flags;
1324 ctx->render_condition.query->predicate->obj->unordered_read = false;
1325 VKCTX(CmdBeginConditionalRenderingEXT)(ctx->bs->cmdbuf, &begin_info);
1326 zink_batch_reference_resource_rw(ctx, ctx->render_condition.query->predicate, false);
1327 ctx->render_condition.active = true;
1328 }
1329
1330 void
zink_stop_conditional_render(struct zink_context * ctx)1331 zink_stop_conditional_render(struct zink_context *ctx)
1332 {
1333 zink_clear_apply_conditionals(ctx);
1334 if (unlikely(!zink_screen(ctx->base.screen)->info.have_EXT_conditional_rendering) || !ctx->render_condition.active)
1335 return;
1336 VKCTX(CmdEndConditionalRenderingEXT)(ctx->bs->cmdbuf);
1337 ctx->render_condition.active = false;
1338 }
1339
1340 static void
zink_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)1341 zink_render_condition(struct pipe_context *pctx,
1342 struct pipe_query *pquery,
1343 bool condition,
1344 enum pipe_render_cond_flag mode)
1345 {
1346 struct zink_context *ctx = zink_context(pctx);
1347 struct zink_query *query = (struct zink_query *)pquery;
1348 zink_batch_no_rp(ctx);
1349 VkQueryResultFlagBits flags = 0;
1350
1351 ctx->bs->has_work = true;
1352 if (query == NULL) {
1353 /* force conditional clears if they exist */
1354 if (ctx->clears_enabled && !ctx->in_rp)
1355 zink_batch_rp(ctx);
1356 zink_stop_conditional_render(ctx);
1357 ctx->render_condition_active = false;
1358 ctx->render_condition.query = NULL;
1359 return;
1360 }
1361
1362 if (!query->predicate) {
1363 struct pipe_resource *pres;
1364
1365 /* need to create a vulkan buffer to copy the data into */
1366 pres = pipe_buffer_create(pctx->screen, PIPE_BIND_QUERY_BUFFER, PIPE_USAGE_DEFAULT, sizeof(uint64_t));
1367 if (!pres)
1368 return;
1369
1370 query->predicate = zink_resource(pres);
1371 }
1372 if (query->predicate_dirty) {
1373 struct zink_resource *res = query->predicate;
1374
1375 if (mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT)
1376 flags |= VK_QUERY_RESULT_WAIT_BIT;
1377
1378 flags |= VK_QUERY_RESULT_64_BIT;
1379 int num_results = get_num_starts(query);
1380 if (num_results) {
1381 if (!is_emulated_primgen(query) &&
1382 !is_so_overflow_query(query) &&
1383 num_results == 1) {
1384 copy_results_to_buffer(ctx, query, res, 0, num_results, flags);
1385 } else {
1386 /* these need special handling */
1387 force_cpu_read(ctx, pquery, PIPE_QUERY_TYPE_U32, &res->base.b, 0);
1388 }
1389 } else {
1390 uint64_t zero = 0;
1391 tc_buffer_write(pctx, &res->base.b, 0, sizeof(zero), &zero);
1392 }
1393 zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT);
1394 query->predicate_dirty = false;
1395 }
1396 ctx->render_condition.inverted = condition;
1397 ctx->render_condition_active = true;
1398 ctx->render_condition.query = query;
1399 if (ctx->in_rp)
1400 zink_start_conditional_render(ctx);
1401 }
1402
1403 static void
zink_get_query_result_resource(struct pipe_context * pctx,struct pipe_query * pquery,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * pres,unsigned offset)1404 zink_get_query_result_resource(struct pipe_context *pctx,
1405 struct pipe_query *pquery,
1406 enum pipe_query_flags flags,
1407 enum pipe_query_value_type result_type,
1408 int index,
1409 struct pipe_resource *pres,
1410 unsigned offset)
1411 {
1412 struct zink_context *ctx = zink_context(pctx);
1413 struct zink_screen *screen = zink_screen(pctx->screen);
1414 struct zink_query *query = (struct zink_query*)pquery;
1415 struct zink_resource *res = zink_resource(pres);
1416 unsigned result_size = result_type <= PIPE_QUERY_TYPE_U32 ? sizeof(uint32_t) : sizeof(uint64_t);
1417 VkQueryResultFlagBits size_flags = result_type <= PIPE_QUERY_TYPE_U32 ? 0 : VK_QUERY_RESULT_64_BIT;
1418 unsigned num_queries = get_num_starts(query);
1419
1420 /* it's possible that a query may have no data at all: write out zeroes to the buffer and return */
1421 uint64_t u64[4] = {0};
1422 unsigned src_offset = result_size * get_num_results(query);
1423 if (!num_queries) {
1424 tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1425 return;
1426 }
1427
1428 if (index == -1) {
1429 /* VK_QUERY_RESULT_WITH_AVAILABILITY_BIT will ALWAYS write some kind of result data
1430 * in addition to the availability result, which is a problem if we're just trying to get availability data
1431 *
1432 * if we know that there's no valid buffer data in the preceding buffer range, then we can just
1433 * stomp on it with a glorious queued buffer copy instead of forcing a stall to manually write to the
1434 * buffer
1435 */
1436
1437 VkQueryResultFlags flag = is_time_query(query) ? 0 : VK_QUERY_RESULT_PARTIAL_BIT;
1438 if (zink_batch_usage_check_completion(ctx, query->batch_uses)) {
1439 struct zink_query_start *start = util_dynarray_top_ptr(&query->starts, struct zink_query_start);
1440 unsigned query_id = start->vkq[0]->query_id;
1441 VkResult result = VKCTX(GetQueryPoolResults)(screen->dev, start->vkq[0]->pool->query_pool, query_id, 1,
1442 sizeof(u64), u64, 0, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1443 if (result == VK_SUCCESS) {
1444 tc_buffer_write(pctx, pres, offset, result_size, (unsigned char*)u64 + src_offset);
1445 return;
1446 } else {
1447 mesa_loge("ZINK: vkGetQueryPoolResults failed (%s)", vk_Result_to_str(result));
1448 }
1449 }
1450 struct pipe_resource *staging = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_STAGING, src_offset + result_size);
1451 copy_results_to_buffer(ctx, query, zink_resource(staging), 0, 1, size_flags | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT | flag);
1452 zink_copy_buffer(ctx, res, zink_resource(staging), offset, result_size * get_num_results(query), result_size);
1453 pipe_resource_reference(&staging, NULL);
1454 return;
1455 }
1456
1457 /*
1458 there is an implicit execution dependency from
1459 each such query command to all query commands previously submitted to the same queue. There
1460 is one significant exception to this; if the flags parameter of vkCmdCopyQueryPoolResults does not
1461 include VK_QUERY_RESULT_WAIT_BIT, execution of vkCmdCopyQueryPoolResults may happen-before
1462 the results of vkCmdEndQuery are available.
1463
1464 * - Chapter 18. Queries
1465 */
1466 size_flags |= VK_QUERY_RESULT_WAIT_BIT;
1467 if (!is_time_query(query) && !is_bool_query(query)) {
1468 if (num_queries == 1 && !is_emulated_primgen(query) &&
1469 query->type != PIPE_QUERY_PRIMITIVES_EMITTED &&
1470 !is_bool_query(query)) {
1471 if (size_flags == VK_QUERY_RESULT_64_BIT) {
1472 if (query->needs_update)
1473 update_qbo(ctx, query);
1474 /* internal qbo always writes 64bit value so we can just direct copy */
1475 zink_copy_buffer(ctx, res, zink_resource(query->curr_qbo->buffers[0]), offset,
1476 get_buffer_offset(query),
1477 result_size);
1478 } else
1479 /* have to do a new copy for 32bit */
1480 copy_results_to_buffer(ctx, query, res, offset, 1, size_flags);
1481 return;
1482 }
1483 }
1484
1485 /* TODO: use CS to aggregate results */
1486
1487 /* unfortunately, there's no way to accumulate results from multiple queries on the gpu without either
1488 * clobbering all but the last result or writing the results sequentially, so we have to manually write the result
1489 */
1490 force_cpu_read(ctx, pquery, result_type, pres, offset);
1491 }
1492
1493 uint64_t
zink_get_timestamp(struct pipe_screen * pscreen)1494 zink_get_timestamp(struct pipe_screen *pscreen)
1495 {
1496 struct zink_screen *screen = zink_screen(pscreen);
1497 uint64_t timestamp, deviation;
1498 if (screen->info.have_EXT_calibrated_timestamps) {
1499 VkCalibratedTimestampInfoEXT cti = {0};
1500 cti.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT;
1501 cti.timeDomain = VK_TIME_DOMAIN_DEVICE_EXT;
1502 VkResult result = VKSCR(GetCalibratedTimestampsEXT)(screen->dev, 1, &cti, ×tamp, &deviation);
1503 if (result != VK_SUCCESS) {
1504 mesa_loge("ZINK: vkGetCalibratedTimestampsEXT failed (%s)", vk_Result_to_str(result));
1505 }
1506 } else {
1507 zink_screen_lock_context(screen);
1508 struct pipe_context *pctx = &screen->copy_context->base;
1509 struct pipe_query *pquery = pctx->create_query(pctx, PIPE_QUERY_TIMESTAMP, 0);
1510 if (!pquery)
1511 return 0;
1512 union pipe_query_result result = {0};
1513 pctx->begin_query(pctx, pquery);
1514 pctx->end_query(pctx, pquery);
1515 pctx->get_query_result(pctx, pquery, true, &result);
1516 pctx->destroy_query(pctx, pquery);
1517 zink_screen_unlock_context(screen);
1518 timestamp = result.u64;
1519 }
1520 timestamp_to_nanoseconds(screen, ×tamp);
1521 return timestamp;
1522 }
1523
1524 void
zink_context_query_init(struct pipe_context * pctx)1525 zink_context_query_init(struct pipe_context *pctx)
1526 {
1527 struct zink_context *ctx = zink_context(pctx);
1528 list_inithead(&ctx->suspended_queries);
1529 list_inithead(&ctx->primitives_generated_queries);
1530
1531 pctx->create_query = zink_create_query;
1532 pctx->destroy_query = zink_destroy_query;
1533 pctx->begin_query = zink_begin_query;
1534 pctx->end_query = zink_end_query;
1535 pctx->get_query_result = zink_get_query_result;
1536 pctx->get_query_result_resource = zink_get_query_result_resource;
1537 pctx->set_active_query_state = zink_set_active_query_state;
1538 pctx->render_condition = zink_render_condition;
1539 }
1540
1541 int
zink_get_driver_query_group_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_group_info * info)1542 zink_get_driver_query_group_info(struct pipe_screen *pscreen, unsigned index,
1543 struct pipe_driver_query_group_info *info)
1544 {
1545 if (!info)
1546 return 1;
1547
1548 assert(index == 0);
1549 info->name = "Zink counters";
1550 info->max_active_queries = ARRAY_SIZE(zink_specific_queries);
1551 info->num_queries = ARRAY_SIZE(zink_specific_queries);
1552
1553 return 1;
1554 }
1555
1556 int
zink_get_driver_query_info(struct pipe_screen * pscreen,unsigned index,struct pipe_driver_query_info * info)1557 zink_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
1558 struct pipe_driver_query_info *info)
1559 {
1560 if (!info)
1561 return ARRAY_SIZE(zink_specific_queries);
1562
1563 assert(index < ARRAY_SIZE(zink_specific_queries));
1564 *info = zink_specific_queries[index];
1565
1566 return 1;
1567 }
1568