• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "util/os_time.h"
7 
8 #include "nir_builder.h"
9 
10 #include "vk_log.h"
11 #include "vk_meta.h"
12 #include "vk_pipeline.h"
13 
14 #include "genxml/gen_macros.h"
15 
16 #include "panvk_buffer.h"
17 #include "panvk_cmd_buffer.h"
18 #include "panvk_cmd_meta.h"
19 #include "panvk_device.h"
20 #include "panvk_entrypoints.h"
21 #include "panvk_macros.h"
22 #include "panvk_query_pool.h"
23 
24 static nir_def *
panvk_nir_query_report_dev_addr(nir_builder * b,nir_def * pool_addr,nir_def * query_stride,nir_def * query)25 panvk_nir_query_report_dev_addr(nir_builder *b, nir_def *pool_addr,
26                                 nir_def *query_stride, nir_def *query)
27 {
28    return nir_iadd(b, pool_addr, nir_umul_2x32_64(b, query, query_stride));
29 }
30 
31 static nir_def *
panvk_nir_available_dev_addr(nir_builder * b,nir_def * available_addr,nir_def * query)32 panvk_nir_available_dev_addr(nir_builder *b, nir_def *available_addr,
33                              nir_def *query)
34 {
35    nir_def *offset = nir_imul_imm(b, query, sizeof(uint32_t));
36    return nir_iadd(b, available_addr, nir_u2u64(b, offset));
37 }
38 
39 static void
panvk_emit_write_job(struct panvk_cmd_buffer * cmd,struct panvk_batch * batch,enum mali_write_value_type type,uint64_t addr,uint64_t value)40 panvk_emit_write_job(struct panvk_cmd_buffer *cmd, struct panvk_batch *batch,
41                      enum mali_write_value_type type, uint64_t addr,
42                      uint64_t value)
43 {
44    struct panfrost_ptr job =
45       pan_pool_alloc_desc(&cmd->desc_pool.base, WRITE_VALUE_JOB);
46 
47    pan_section_pack(job.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {
48       payload.type = type;
49       payload.address = addr;
50       payload.immediate_value = value;
51    };
52 
53    pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_WRITE_VALUE, true, false, 0, 0,
54                   &job, false);
55 }
56 
57 static struct panvk_batch *
open_batch(struct panvk_cmd_buffer * cmd,bool * had_batch)58 open_batch(struct panvk_cmd_buffer *cmd, bool *had_batch)
59 {
60    bool res = cmd->cur_batch != NULL;
61 
62    if (!res)
63       panvk_per_arch(cmd_open_batch)(cmd);
64 
65    *had_batch = res;
66 
67    return cmd->cur_batch;
68 }
69 
70 static void
close_batch(struct panvk_cmd_buffer * cmd,bool had_batch)71 close_batch(struct panvk_cmd_buffer *cmd, bool had_batch)
72 {
73    if (!had_batch)
74       panvk_per_arch(cmd_close_batch)(cmd);
75 }
76 
77 #define load_info(__b, __type, __field_name)                                   \
78    nir_load_push_constant((__b), 1,                                            \
79                           sizeof(((__type *)NULL)->__field_name) * 8,          \
80                           nir_imm_int(b, offsetof(__type, __field_name)))
81 
82 struct panvk_clear_query_push {
83    uint64_t pool_addr;
84    uint64_t available_addr;
85    uint32_t query_stride;
86    uint32_t first_query;
87    uint32_t query_count;
88    uint32_t reports_per_query;
89    uint32_t availaible_value;
90 };
91 
92 static void
panvk_nir_clear_query(nir_builder * b,nir_def * i)93 panvk_nir_clear_query(nir_builder *b, nir_def *i)
94 {
95    nir_def *pool_addr = load_info(b, struct panvk_clear_query_push, pool_addr);
96    nir_def *available_addr =
97       nir_u2u64(b, load_info(b, struct panvk_clear_query_push, available_addr));
98    nir_def *query_stride =
99       load_info(b, struct panvk_clear_query_push, query_stride);
100    nir_def *first_query =
101       load_info(b, struct panvk_clear_query_push, first_query);
102    nir_def *reports_per_query =
103       load_info(b, struct panvk_clear_query_push, reports_per_query);
104    nir_def *avail_value =
105       load_info(b, struct panvk_clear_query_push, availaible_value);
106 
107    nir_def *query = nir_iadd(b, first_query, i);
108 
109    nir_def *avail_addr = panvk_nir_available_dev_addr(b, available_addr, query);
110    nir_def *report_addr =
111       panvk_nir_query_report_dev_addr(b, pool_addr, query_stride, query);
112 
113    nir_store_global(b, avail_addr, 4, avail_value, 0x1);
114 
115    nir_def *zero = nir_imm_int64(b, 0);
116    nir_variable *r = nir_local_variable_create(b->impl, glsl_uint_type(), "r");
117    nir_store_var(b, r, nir_imm_int(b, 0), 0x1);
118 
119    uint32_t qwords_per_report =
120       DIV_ROUND_UP(sizeof(struct panvk_query_report), sizeof(uint64_t));
121 
122    nir_push_loop(b);
123    {
124       nir_def *report_idx = nir_load_var(b, r);
125       nir_break_if(b, nir_ige(b, report_idx, reports_per_query));
126 
127       nir_def *base_addr = nir_iadd(
128          b, report_addr,
129          nir_i2i64(
130             b, nir_imul_imm(b, report_idx, sizeof(struct panvk_query_report))));
131 
132       for (uint32_t y = 0; y < qwords_per_report; y++) {
133          nir_def *addr = nir_iadd_imm(b, base_addr, y * sizeof(uint64_t));
134          nir_store_global(b, addr, 8, zero, 0x1);
135       }
136 
137       nir_store_var(b, r, nir_iadd_imm(b, report_idx, 1), 0x1);
138    }
139    nir_pop_loop(b, NULL);
140 }
141 
142 static nir_shader *
build_clear_queries_shader(uint32_t max_threads_per_wg)143 build_clear_queries_shader(uint32_t max_threads_per_wg)
144 {
145    nir_builder build = nir_builder_init_simple_shader(
146       MESA_SHADER_COMPUTE, NULL, "panvk-meta-clear-queries");
147    nir_builder *b = &build;
148 
149    b->shader->info.workgroup_size[0] = max_threads_per_wg;
150    nir_def *wg_id = nir_load_workgroup_id(b);
151    nir_def *i =
152       nir_iadd(b, nir_load_subgroup_invocation(b),
153                nir_imul_imm(b, nir_channel(b, wg_id, 0), max_threads_per_wg));
154 
155    nir_def *query_count =
156       load_info(b, struct panvk_clear_query_push, query_count);
157    nir_push_if(b, nir_ilt(b, i, query_count));
158    {
159       panvk_nir_clear_query(b, i);
160    }
161    nir_pop_if(b, NULL);
162 
163    return build.shader;
164 }
165 
166 static VkResult
get_clear_queries_pipeline(struct panvk_device * dev,const void * key_data,size_t key_size,VkPipelineLayout layout,VkPipeline * pipeline_out)167 get_clear_queries_pipeline(struct panvk_device *dev, const void *key_data,
168                            size_t key_size, VkPipelineLayout layout,
169                            VkPipeline *pipeline_out)
170 {
171    const struct panvk_physical_device *phys_dev =
172       to_panvk_physical_device(dev->vk.physical);
173 
174    const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
175       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
176       .nir =
177          build_clear_queries_shader(phys_dev->kmod.props.max_threads_per_wg),
178    };
179    const VkComputePipelineCreateInfo info = {
180       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
181       .stage =
182          {
183             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
184             .pNext = &nir_info,
185             .stage = VK_SHADER_STAGE_COMPUTE_BIT,
186             .pName = "main",
187          },
188       .layout = layout,
189    };
190 
191    return vk_meta_create_compute_pipeline(&dev->vk, &dev->meta, &info, key_data,
192                                           key_size, pipeline_out);
193 }
194 
195 static void
panvk_emit_clear_queries(struct panvk_cmd_buffer * cmd,struct panvk_query_pool * pool,bool availaible,uint32_t first_query,uint32_t query_count)196 panvk_emit_clear_queries(struct panvk_cmd_buffer *cmd,
197                          struct panvk_query_pool *pool, bool availaible,
198                          uint32_t first_query, uint32_t query_count)
199 {
200    struct panvk_device *dev = to_panvk_device(cmd->vk.base.device);
201    const struct panvk_physical_device *phys_dev =
202       to_panvk_physical_device(dev->vk.physical);
203    VkResult result;
204 
205    const struct panvk_clear_query_push push = {
206       .pool_addr = panvk_priv_mem_dev_addr(pool->mem),
207       .available_addr = panvk_priv_mem_dev_addr(pool->available_mem),
208       .query_stride = pool->query_stride,
209       .first_query = first_query,
210       .query_count = query_count,
211       .reports_per_query = pool->reports_per_query,
212       .availaible_value = availaible,
213    };
214 
215    const enum panvk_meta_object_key_type key =
216       PANVK_META_OBJECT_KEY_CLEAR_QUERY_POOL_PIPELINE;
217    const VkPushConstantRange push_range = {
218       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
219       .size = sizeof(push),
220    };
221    VkPipelineLayout layout;
222    result = vk_meta_get_pipeline_layout(&dev->vk, &dev->meta, NULL, &push_range,
223                                         &key, sizeof(key), &layout);
224    if (result != VK_SUCCESS) {
225       vk_command_buffer_set_error(&cmd->vk, result);
226       return;
227    }
228 
229    VkPipeline pipeline = vk_meta_lookup_pipeline(&dev->meta, &key, sizeof(key));
230 
231    if (pipeline == VK_NULL_HANDLE) {
232       result =
233          get_clear_queries_pipeline(dev, &key, sizeof(key), layout, &pipeline);
234 
235       if (result != VK_SUCCESS) {
236          vk_command_buffer_set_error(&cmd->vk, result);
237          return;
238       }
239    }
240 
241    /* Save previous cmd state */
242    struct panvk_cmd_meta_compute_save_ctx save = {0};
243    panvk_per_arch(cmd_meta_compute_start)(cmd, &save);
244 
245    dev->vk.dispatch_table.CmdBindPipeline(panvk_cmd_buffer_to_handle(cmd),
246                                           VK_PIPELINE_BIND_POINT_COMPUTE,
247                                           pipeline);
248 
249    dev->vk.dispatch_table.CmdPushConstants(panvk_cmd_buffer_to_handle(cmd),
250                                            layout, VK_SHADER_STAGE_COMPUTE_BIT,
251                                            0, sizeof(push), &push);
252 
253    dev->vk.dispatch_table.CmdDispatchBase(
254       panvk_cmd_buffer_to_handle(cmd), 0, 0, 0,
255       DIV_ROUND_UP(query_count, phys_dev->kmod.props.max_threads_per_wg), 1, 1);
256 
257    /* Restore previous cmd state */
258    panvk_per_arch(cmd_meta_compute_end)(cmd, &save);
259 }
260 
261 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdResetQueryPool)262 panvk_per_arch(CmdResetQueryPool)(VkCommandBuffer commandBuffer,
263                                   VkQueryPool queryPool, uint32_t firstQuery,
264                                   uint32_t queryCount)
265 {
266    VK_FROM_HANDLE(panvk_cmd_buffer, cmd, commandBuffer);
267    VK_FROM_HANDLE(panvk_query_pool, pool, queryPool);
268 
269    if (queryCount == 0)
270       return;
271 
272    panvk_emit_clear_queries(cmd, pool, false, firstQuery, queryCount);
273 }
274 
275 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdWriteTimestamp2)276 panvk_per_arch(CmdWriteTimestamp2)(VkCommandBuffer commandBuffer,
277                                    VkPipelineStageFlags2 stage,
278                                    VkQueryPool queryPool, uint32_t query)
279 {
280    UNUSED VK_FROM_HANDLE(panvk_cmd_buffer, cmd, commandBuffer);
281    UNUSED VK_FROM_HANDLE(panvk_query_pool, pool, queryPool);
282 
283    panvk_stub();
284 }
285 
286 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBeginQueryIndexedEXT)287 panvk_per_arch(CmdBeginQueryIndexedEXT)(VkCommandBuffer commandBuffer,
288                                         VkQueryPool queryPool, uint32_t query,
289                                         VkQueryControlFlags flags,
290                                         uint32_t index)
291 {
292    VK_FROM_HANDLE(panvk_cmd_buffer, cmd, commandBuffer);
293    VK_FROM_HANDLE(panvk_query_pool, pool, queryPool);
294 
295    /* TODO: transform feedback */
296    assert(index == 0);
297 
298    bool had_batch;
299    struct panvk_batch *batch = open_batch(cmd, &had_batch);
300    uint64_t report_addr = panvk_query_report_dev_addr(pool, query);
301 
302    switch (pool->vk.query_type) {
303    case VK_QUERY_TYPE_OCCLUSION: {
304       cmd->state.gfx.occlusion_query.ptr = report_addr;
305       cmd->state.gfx.occlusion_query.mode = flags & VK_QUERY_CONTROL_PRECISE_BIT
306                                                ? MALI_OCCLUSION_MODE_COUNTER
307                                                : MALI_OCCLUSION_MODE_PREDICATE;
308       gfx_state_set_dirty(cmd, OQ);
309 
310       /* From the Vulkan spec:
311        *
312        *   "When an occlusion query begins, the count of passing samples
313        *    always starts at zero."
314        *
315        */
316       for (unsigned i = 0; i < pool->reports_per_query; i++) {
317          panvk_emit_write_job(
318             cmd, batch, MALI_WRITE_VALUE_TYPE_IMMEDIATE_64,
319             report_addr + i * sizeof(struct panvk_query_report), 0);
320       }
321       break;
322    }
323    default:
324       unreachable("Unsupported query type");
325    }
326 
327    close_batch(cmd, had_batch);
328 }
329 
330 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdEndQueryIndexedEXT)331 panvk_per_arch(CmdEndQueryIndexedEXT)(VkCommandBuffer commandBuffer,
332                                       VkQueryPool queryPool, uint32_t query,
333                                       uint32_t index)
334 {
335    VK_FROM_HANDLE(panvk_cmd_buffer, cmd, commandBuffer);
336    VK_FROM_HANDLE(panvk_query_pool, pool, queryPool);
337 
338    /* TODO: transform feedback */
339    assert(index == 0);
340 
341    bool end_sync = cmd->cur_batch != NULL;
342 
343    /* Close to ensure we are sync and flush caches */
344    if (end_sync)
345       panvk_per_arch(cmd_close_batch)(cmd);
346 
347    bool had_batch;
348    struct panvk_batch *batch = open_batch(cmd, &had_batch);
349    had_batch |= end_sync;
350 
351    switch (pool->vk.query_type) {
352    case VK_QUERY_TYPE_OCCLUSION: {
353       cmd->state.gfx.occlusion_query.ptr = 0;
354       cmd->state.gfx.occlusion_query.mode = MALI_OCCLUSION_MODE_DISABLED;
355       gfx_state_set_dirty(cmd, OQ);
356       break;
357    }
358    default:
359       unreachable("Unsupported query type");
360    }
361 
362    uint64_t available_addr = panvk_query_available_dev_addr(pool, query);
363    panvk_emit_write_job(cmd, batch, MALI_WRITE_VALUE_TYPE_IMMEDIATE_32,
364                         available_addr, 1);
365 
366    close_batch(cmd, had_batch);
367 }
368 
369 static void
nir_write_query_result(nir_builder * b,nir_def * dst_addr,nir_def * idx,nir_def * flags,nir_def * result)370 nir_write_query_result(nir_builder *b, nir_def *dst_addr, nir_def *idx,
371                        nir_def *flags, nir_def *result)
372 {
373    assert(result->num_components == 1);
374    assert(result->bit_size == 64);
375 
376    nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_64_BIT));
377    {
378       nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 8));
379       nir_store_global(b, nir_iadd(b, dst_addr, offset), 8, result, 0x1);
380    }
381    nir_push_else(b, NULL);
382    {
383       nir_def *result32 = nir_u2u32(b, result);
384       nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 4));
385       nir_store_global(b, nir_iadd(b, dst_addr, offset), 4, result32, 0x1);
386    }
387    nir_pop_if(b, NULL);
388 }
389 
390 static void
nir_write_occlusion_query_result(nir_builder * b,nir_def * dst_addr,nir_def * idx,nir_def * flags,nir_def * report_addr,unsigned core_count)391 nir_write_occlusion_query_result(nir_builder *b, nir_def *dst_addr,
392                                  nir_def *idx, nir_def *flags,
393                                  nir_def *report_addr, unsigned core_count)
394 {
395    nir_def *value = nir_imm_int64(b, 0);
396 
397    for (unsigned core_idx = 0; core_idx < core_count; core_idx++) {
398       /* Start values start at the second entry */
399       unsigned report_offset = core_idx * sizeof(struct panvk_query_report);
400 
401       value = nir_iadd(
402          b, value,
403          nir_load_global(
404             b, nir_iadd(b, report_addr, nir_imm_int64(b, report_offset)), 8, 1,
405             64));
406    }
407 
408    nir_write_query_result(b, dst_addr, idx, flags, value);
409 }
410 
411 struct panvk_copy_query_push {
412    uint64_t pool_addr;
413    uint32_t available_addr;
414    uint32_t query_stride;
415    uint32_t first_query;
416    uint32_t query_count;
417    uint64_t dst_addr;
418    uint64_t dst_stride;
419    uint32_t flags;
420 };
421 
422 static void
panvk_nir_copy_query(nir_builder * b,VkQueryType query_type,unsigned core_count,nir_def * i)423 panvk_nir_copy_query(nir_builder *b, VkQueryType query_type,
424                      unsigned core_count, nir_def *i)
425 {
426    nir_def *pool_addr = load_info(b, struct panvk_copy_query_push, pool_addr);
427    nir_def *available_addr =
428       nir_u2u64(b, load_info(b, struct panvk_copy_query_push, available_addr));
429    nir_def *query_stride =
430       load_info(b, struct panvk_copy_query_push, query_stride);
431    nir_def *first_query =
432       load_info(b, struct panvk_copy_query_push, first_query);
433    nir_def *dst_addr = load_info(b, struct panvk_copy_query_push, dst_addr);
434    nir_def *dst_stride = load_info(b, struct panvk_copy_query_push, dst_stride);
435    nir_def *flags = load_info(b, struct panvk_copy_query_push, flags);
436 
437    nir_def *query = nir_iadd(b, first_query, i);
438 
439    nir_def *avail_addr = panvk_nir_available_dev_addr(b, available_addr, query);
440    nir_def *available = nir_i2b(b, nir_load_global(b, avail_addr, 4, 1, 32));
441 
442    nir_def *partial = nir_test_mask(b, flags, VK_QUERY_RESULT_PARTIAL_BIT);
443    nir_def *write_results = nir_ior(b, available, partial);
444 
445    nir_def *report_addr =
446       panvk_nir_query_report_dev_addr(b, pool_addr, query_stride, query);
447    nir_def *dst_offset = nir_imul(b, nir_u2u64(b, i), dst_stride);
448 
449    nir_push_if(b, write_results);
450    {
451       switch (query_type) {
452       case VK_QUERY_TYPE_OCCLUSION: {
453          nir_write_occlusion_query_result(b, nir_iadd(b, dst_addr, dst_offset),
454                                           nir_imm_int(b, 0), flags, report_addr,
455                                           core_count);
456          break;
457       }
458       default:
459          unreachable("Unsupported query type");
460       }
461    }
462    nir_pop_if(b, NULL);
463 
464    nir_push_if(b,
465                nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
466    {
467       nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset),
468                              nir_imm_int(b, 1), flags, nir_b2i64(b, available));
469    }
470    nir_pop_if(b, NULL);
471 }
472 
473 static nir_shader *
build_copy_queries_shader(VkQueryType query_type,uint32_t max_threads_per_wg,unsigned core_count)474 build_copy_queries_shader(VkQueryType query_type, uint32_t max_threads_per_wg,
475                           unsigned core_count)
476 {
477    nir_builder build = nir_builder_init_simple_shader(
478       MESA_SHADER_COMPUTE, NULL,
479       "panvk-meta-copy-queries(query_type=%d,core_count=%u)", query_type,
480       core_count);
481    nir_builder *b = &build;
482 
483    b->shader->info.workgroup_size[0] = max_threads_per_wg;
484    nir_def *wg_id = nir_load_workgroup_id(b);
485    nir_def *i =
486       nir_iadd(b, nir_load_subgroup_invocation(b),
487                nir_imul_imm(b, nir_channel(b, wg_id, 0), max_threads_per_wg));
488 
489    nir_def *query_count =
490       load_info(b, struct panvk_copy_query_push, query_count);
491    nir_push_if(b, nir_ilt(b, i, query_count));
492    {
493       panvk_nir_copy_query(b, query_type, core_count, i);
494    }
495    nir_pop_if(b, NULL);
496 
497    return build.shader;
498 }
499 
500 static VkResult
get_copy_queries_pipeline(struct panvk_device * dev,VkQueryType query_type,const void * key_data,size_t key_size,VkPipelineLayout layout,VkPipeline * pipeline_out)501 get_copy_queries_pipeline(struct panvk_device *dev, VkQueryType query_type,
502                           const void *key_data, size_t key_size,
503                           VkPipelineLayout layout, VkPipeline *pipeline_out)
504 {
505    const struct panvk_physical_device *phys_dev =
506       to_panvk_physical_device(dev->vk.physical);
507 
508    unsigned core_count;
509    panfrost_query_core_count(&phys_dev->kmod.props, &core_count);
510    const VkPipelineShaderStageNirCreateInfoMESA nir_info = {
511       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NIR_CREATE_INFO_MESA,
512       .nir = build_copy_queries_shader(
513          query_type, phys_dev->kmod.props.max_threads_per_wg, core_count),
514    };
515    const VkComputePipelineCreateInfo info = {
516       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
517       .stage =
518          {
519             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
520             .pNext = &nir_info,
521             .stage = VK_SHADER_STAGE_COMPUTE_BIT,
522             .pName = "main",
523          },
524       .layout = layout,
525    };
526 
527    return vk_meta_create_compute_pipeline(&dev->vk, &dev->meta, &info, key_data,
528                                           key_size, pipeline_out);
529 }
530 
531 static void
panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer * cmd,struct panvk_query_pool * pool,uint32_t first_query,uint32_t query_count,uint64_t dst_addr,uint64_t dst_stride,VkQueryResultFlags flags)532 panvk_meta_copy_query_pool_results(struct panvk_cmd_buffer *cmd,
533                                    struct panvk_query_pool *pool,
534                                    uint32_t first_query, uint32_t query_count,
535                                    uint64_t dst_addr, uint64_t dst_stride,
536                                    VkQueryResultFlags flags)
537 {
538    struct panvk_device *dev = to_panvk_device(cmd->vk.base.device);
539    const struct panvk_physical_device *phys_dev =
540       to_panvk_physical_device(dev->vk.physical);
541    VkResult result;
542 
543    const struct panvk_copy_query_push push = {
544       .pool_addr = panvk_priv_mem_dev_addr(pool->mem),
545       .available_addr = panvk_priv_mem_dev_addr(pool->available_mem),
546       .query_stride = pool->query_stride,
547       .first_query = first_query,
548       .query_count = query_count,
549       .dst_addr = dst_addr,
550       .dst_stride = dst_stride,
551       .flags = flags,
552    };
553 
554    enum panvk_meta_object_key_type key;
555 
556    switch (pool->vk.query_type) {
557    case VK_QUERY_TYPE_OCCLUSION: {
558       key = PANVK_META_OBJECT_KEY_COPY_QUERY_POOL_RESULTS_OQ_PIPELINE;
559       break;
560    }
561    default:
562       unreachable("Unsupported query type");
563    }
564 
565    const VkPushConstantRange push_range = {
566       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
567       .size = sizeof(push),
568    };
569    VkPipelineLayout layout;
570    result = vk_meta_get_pipeline_layout(&dev->vk, &dev->meta, NULL, &push_range,
571                                         &key, sizeof(key), &layout);
572    if (result != VK_SUCCESS) {
573       vk_command_buffer_set_error(&cmd->vk, result);
574       return;
575    }
576 
577    VkPipeline pipeline = vk_meta_lookup_pipeline(&dev->meta, &key, sizeof(key));
578 
579    if (pipeline == VK_NULL_HANDLE) {
580       result = get_copy_queries_pipeline(dev, pool->vk.query_type, &key,
581                                          sizeof(key), layout, &pipeline);
582 
583       if (result != VK_SUCCESS) {
584          vk_command_buffer_set_error(&cmd->vk, result);
585          return;
586       }
587    }
588 
589    /* Save previous cmd state */
590    struct panvk_cmd_meta_compute_save_ctx save = {0};
591    panvk_per_arch(cmd_meta_compute_start)(cmd, &save);
592 
593    dev->vk.dispatch_table.CmdBindPipeline(panvk_cmd_buffer_to_handle(cmd),
594                                           VK_PIPELINE_BIND_POINT_COMPUTE,
595                                           pipeline);
596 
597    dev->vk.dispatch_table.CmdPushConstants(panvk_cmd_buffer_to_handle(cmd),
598                                            layout, VK_SHADER_STAGE_COMPUTE_BIT,
599                                            0, sizeof(push), &push);
600 
601    dev->vk.dispatch_table.CmdDispatchBase(
602       panvk_cmd_buffer_to_handle(cmd), 0, 0, 0,
603       DIV_ROUND_UP(query_count, phys_dev->kmod.props.max_threads_per_wg), 1, 1);
604 
605    /* Restore previous cmd state */
606    panvk_per_arch(cmd_meta_compute_end)(cmd, &save);
607 }
608 
609 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdCopyQueryPoolResults)610 panvk_per_arch(CmdCopyQueryPoolResults)(
611    VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
612    uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset,
613    VkDeviceSize stride, VkQueryResultFlags flags)
614 {
615    VK_FROM_HANDLE(panvk_cmd_buffer, cmd, commandBuffer);
616    VK_FROM_HANDLE(panvk_query_pool, pool, queryPool);
617    VK_FROM_HANDLE(panvk_buffer, dst_buffer, dstBuffer);
618 
619    /* XXX: Do we really need that barrier when EndQuery already handle it? */
620    if ((flags & VK_QUERY_RESULT_WAIT_BIT) && cmd->cur_batch != NULL) {
621       close_batch(cmd, true);
622    }
623 
624    uint64_t dst_addr = panvk_buffer_gpu_ptr(dst_buffer, dstOffset);
625    panvk_meta_copy_query_pool_results(cmd, pool, firstQuery, queryCount,
626                                       dst_addr, stride, flags);
627 }
628