1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30
31 #include "hwdef/rogue_hw_utils.h"
32 #include "pvr_bo.h"
33 #include "pvr_formats.h"
34 #include "pvr_pds.h"
35 #include "pvr_private.h"
36 #include "usc/programs/pvr_shader_factory.h"
37 #include "usc/programs/pvr_static_shaders.h"
38 #include "pvr_tex_state.h"
39 #include "pvr_types.h"
40 #include "vk_alloc.h"
41 #include "vk_command_pool.h"
42 #include "vk_util.h"
43
pvr_init_primary_compute_pds_program(struct pvr_pds_compute_shader_program * program)44 static inline void pvr_init_primary_compute_pds_program(
45 struct pvr_pds_compute_shader_program *program)
46 {
47 pvr_pds_compute_shader_program_init(program);
48 program->local_input_regs[0] = 0;
49 /* Workgroup id is in reg0. */
50 program->work_group_input_regs[0] = 0;
51 program->flattened_work_groups = true;
52 program->kick_usc = true;
53 }
54
pvr_create_compute_secondary_prog(struct pvr_device * device,const struct pvr_shader_factory_info * shader_factory_info,struct pvr_compute_query_shader * query_prog)55 static VkResult pvr_create_compute_secondary_prog(
56 struct pvr_device *device,
57 const struct pvr_shader_factory_info *shader_factory_info,
58 struct pvr_compute_query_shader *query_prog)
59 {
60 const size_t size =
61 pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes();
62 struct pvr_pds_descriptor_program_input sec_pds_program;
63 struct pvr_pds_info *info = &query_prog->info;
64 uint32_t staging_buffer_size;
65 uint32_t *staging_buffer;
66 VkResult result;
67
68 info->entries =
69 vk_alloc(&device->vk.alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
70 if (!info->entries)
71 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
72
73 info->entries_size_in_bytes = size;
74
75 sec_pds_program = (struct pvr_pds_descriptor_program_input){
76 .buffer_count = 1,
77 .buffers = {
78 [0] = {
79 .buffer_id = 0,
80 .source_offset = 0,
81 .type = PVR_BUFFER_TYPE_COMPILE_TIME,
82 .size_in_dwords = shader_factory_info->const_shared_regs,
83 .destination = shader_factory_info->explicit_const_start_offset,
84 }
85 },
86 };
87
88 pvr_pds_generate_descriptor_upload_program(&sec_pds_program, NULL, info);
89
90 staging_buffer_size = info->code_size_in_dwords;
91
92 staging_buffer = vk_alloc(&device->vk.alloc,
93 PVR_DW_TO_BYTES(staging_buffer_size),
94 8,
95 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
96 if (!staging_buffer) {
97 vk_free(&device->vk.alloc, info->entries);
98 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
99 }
100
101 pvr_pds_generate_descriptor_upload_program(&sec_pds_program,
102 staging_buffer,
103 info);
104
105 assert(info->code_size_in_dwords <= staging_buffer_size);
106
107 /* FIXME: Figure out the define for alignment of 16. */
108 result = pvr_gpu_upload_pds(device,
109 NULL,
110 0,
111 0,
112 staging_buffer,
113 info->code_size_in_dwords,
114 16,
115 16,
116 &query_prog->pds_sec_code);
117 if (result != VK_SUCCESS) {
118 vk_free(&device->vk.alloc, staging_buffer);
119 vk_free(&device->vk.alloc, info->entries);
120 return result;
121 }
122
123 vk_free(&device->vk.alloc, staging_buffer);
124
125 return VK_SUCCESS;
126 }
127
128 static void
pvr_destroy_compute_secondary_prog(struct pvr_device * device,struct pvr_compute_query_shader * program)129 pvr_destroy_compute_secondary_prog(struct pvr_device *device,
130 struct pvr_compute_query_shader *program)
131 {
132 pvr_bo_suballoc_free(program->pds_sec_code.pvr_bo);
133 vk_free(&device->vk.alloc, program->info.entries);
134 }
135
pvr_create_compute_query_program(struct pvr_device * device,const struct pvr_shader_factory_info * shader_factory_info,struct pvr_compute_query_shader * query_prog)136 static VkResult pvr_create_compute_query_program(
137 struct pvr_device *device,
138 const struct pvr_shader_factory_info *shader_factory_info,
139 struct pvr_compute_query_shader *query_prog)
140 {
141 const uint32_t cache_line_size =
142 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
143 struct pvr_pds_compute_shader_program pds_primary_prog;
144 VkResult result;
145
146 /* No support for query constant calc program. */
147 assert(shader_factory_info->const_calc_prog_inst_bytes == 0);
148 /* No support for query coefficient update program. */
149 assert(shader_factory_info->coeff_update_prog_start == PVR_INVALID_INST);
150
151 result = pvr_gpu_upload_usc(device,
152 shader_factory_info->shader_code,
153 shader_factory_info->code_size,
154 cache_line_size,
155 &query_prog->usc_bo);
156 if (result != VK_SUCCESS)
157 return result;
158
159 pvr_init_primary_compute_pds_program(&pds_primary_prog);
160
161 pvr_pds_setup_doutu(&pds_primary_prog.usc_task_control,
162 query_prog->usc_bo->dev_addr.addr,
163 shader_factory_info->temps_required,
164 ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
165 false);
166
167 result =
168 pvr_pds_compute_shader_create_and_upload(device,
169 &pds_primary_prog,
170 &query_prog->pds_prim_code);
171 if (result != VK_SUCCESS)
172 goto err_free_usc_bo;
173
174 query_prog->primary_data_size_dw = pds_primary_prog.data_size;
175 query_prog->primary_num_temps = pds_primary_prog.temps_used;
176
177 result = pvr_create_compute_secondary_prog(device,
178 shader_factory_info,
179 query_prog);
180 if (result != VK_SUCCESS)
181 goto err_free_pds_prim_code_bo;
182
183 return VK_SUCCESS;
184
185 err_free_pds_prim_code_bo:
186 pvr_bo_suballoc_free(query_prog->pds_prim_code.pvr_bo);
187
188 err_free_usc_bo:
189 pvr_bo_suballoc_free(query_prog->usc_bo);
190
191 return result;
192 }
193
194 /* TODO: See if we can dedup this with pvr_setup_descriptor_mappings() or
195 * pvr_setup_descriptor_mappings().
196 */
pvr_write_compute_query_pds_data_section(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_compute_query_shader * query_prog,struct pvr_private_compute_pipeline * pipeline)197 static VkResult pvr_write_compute_query_pds_data_section(
198 struct pvr_cmd_buffer *cmd_buffer,
199 const struct pvr_compute_query_shader *query_prog,
200 struct pvr_private_compute_pipeline *pipeline)
201 {
202 const struct pvr_pds_info *const info = &query_prog->info;
203 struct pvr_suballoc_bo *pvr_bo;
204 const uint8_t *entries;
205 uint32_t *dword_buffer;
206 uint64_t *qword_buffer;
207 VkResult result;
208
209 result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
210 cmd_buffer->device->heaps.pds_heap,
211 PVR_DW_TO_BYTES(info->data_size_in_dwords),
212 &pvr_bo);
213 if (result != VK_SUCCESS)
214 return result;
215
216 dword_buffer = (uint32_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
217 qword_buffer = (uint64_t *)pvr_bo_suballoc_get_map_addr(pvr_bo);
218
219 entries = (uint8_t *)info->entries;
220
221 /* TODO: Remove this when we can test this path and make sure that this is
222 * not needed. If it's needed we should probably be using LITERAL entries for
223 * this instead.
224 */
225 memset(dword_buffer, 0xFE, PVR_DW_TO_BYTES(info->data_size_in_dwords));
226
227 pipeline->pds_shared_update_data_size_dw = info->data_size_in_dwords;
228
229 for (uint32_t i = 0; i < info->entry_count; i++) {
230 const struct pvr_const_map_entry *const entry_header =
231 (struct pvr_const_map_entry *)entries;
232
233 switch (entry_header->type) {
234 case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
235 const struct pvr_const_map_entry_literal32 *const literal =
236 (struct pvr_const_map_entry_literal32 *)entries;
237
238 PVR_WRITE(dword_buffer,
239 literal->literal_value,
240 literal->const_offset,
241 info->data_size_in_dwords);
242
243 entries += sizeof(*literal);
244 break;
245 }
246 case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64: {
247 const struct pvr_const_map_entry_literal64 *const literal =
248 (struct pvr_const_map_entry_literal64 *)entries;
249
250 PVR_WRITE(qword_buffer,
251 literal->literal_value,
252 literal->const_offset,
253 info->data_size_in_dwords);
254
255 entries += sizeof(*literal);
256 break;
257 }
258 case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: {
259 const struct pvr_const_map_entry_doutu_address *const doutu_addr =
260 (struct pvr_const_map_entry_doutu_address *)entries;
261 const pvr_dev_addr_t exec_addr =
262 PVR_DEV_ADDR_OFFSET(query_prog->pds_sec_code.pvr_bo->dev_addr,
263 query_prog->pds_sec_code.code_offset);
264 uint64_t addr = 0ULL;
265
266 pvr_set_usc_execution_address64(&addr, exec_addr.addr);
267
268 PVR_WRITE(qword_buffer,
269 addr | doutu_addr->doutu_control,
270 doutu_addr->const_offset,
271 info->data_size_in_dwords);
272
273 entries += sizeof(*doutu_addr);
274 break;
275 }
276 case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: {
277 const struct pvr_const_map_entry_special_buffer *special_buff_entry =
278 (struct pvr_const_map_entry_special_buffer *)entries;
279
280 switch (special_buff_entry->buffer_type) {
281 case PVR_BUFFER_TYPE_COMPILE_TIME: {
282 uint64_t addr = pipeline->const_buffer_addr.addr;
283
284 PVR_WRITE(qword_buffer,
285 addr,
286 special_buff_entry->const_offset,
287 info->data_size_in_dwords);
288 break;
289 }
290
291 default:
292 unreachable("Unsupported special buffer type.");
293 }
294
295 entries += sizeof(*special_buff_entry);
296 break;
297 }
298 default:
299 unreachable("Unsupported data section map");
300 }
301 }
302
303 pipeline->pds_shared_update_data_offset =
304 pvr_bo->dev_addr.addr -
305 cmd_buffer->device->heaps.pds_heap->base_addr.addr;
306
307 return VK_SUCCESS;
308 }
309
pvr_write_private_compute_dispatch(struct pvr_cmd_buffer * cmd_buffer,struct pvr_private_compute_pipeline * pipeline,uint32_t num_query_indices)310 static void pvr_write_private_compute_dispatch(
311 struct pvr_cmd_buffer *cmd_buffer,
312 struct pvr_private_compute_pipeline *pipeline,
313 uint32_t num_query_indices)
314 {
315 struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
316 const uint32_t workgroup_size[PVR_WORKGROUP_DIMENSIONS] = {
317 DIV_ROUND_UP(num_query_indices, 32),
318 1,
319 1,
320 };
321
322 assert(sub_cmd->type == PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
323
324 pvr_compute_update_shared_private(cmd_buffer, &sub_cmd->compute, pipeline);
325 pvr_compute_update_kernel_private(cmd_buffer,
326 &sub_cmd->compute,
327 pipeline,
328 workgroup_size);
329 pvr_compute_generate_fence(cmd_buffer, &sub_cmd->compute, false);
330 }
331
332 static void
pvr_destroy_compute_query_program(struct pvr_device * device,struct pvr_compute_query_shader * program)333 pvr_destroy_compute_query_program(struct pvr_device *device,
334 struct pvr_compute_query_shader *program)
335 {
336 pvr_destroy_compute_secondary_prog(device, program);
337 pvr_bo_suballoc_free(program->pds_prim_code.pvr_bo);
338 pvr_bo_suballoc_free(program->usc_bo);
339 }
340
pvr_create_multibuffer_compute_query_program(struct pvr_device * device,const struct pvr_shader_factory_info * const * shader_factory_info,struct pvr_compute_query_shader * query_programs)341 static VkResult pvr_create_multibuffer_compute_query_program(
342 struct pvr_device *device,
343 const struct pvr_shader_factory_info *const *shader_factory_info,
344 struct pvr_compute_query_shader *query_programs)
345 {
346 const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
347 VkResult result;
348 uint32_t i;
349
350 for (i = 0; i < core_count; i++) {
351 result = pvr_create_compute_query_program(device,
352 shader_factory_info[i],
353 &query_programs[i]);
354 if (result != VK_SUCCESS)
355 goto err_destroy_compute_query_program;
356 }
357
358 return VK_SUCCESS;
359
360 err_destroy_compute_query_program:
361 for (uint32_t j = 0; j < i; j++)
362 pvr_destroy_compute_query_program(device, &query_programs[j]);
363
364 return result;
365 }
366
pvr_device_create_compute_query_programs(struct pvr_device * device)367 VkResult pvr_device_create_compute_query_programs(struct pvr_device *device)
368 {
369 const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
370 VkResult result;
371
372 result = pvr_create_compute_query_program(device,
373 &availability_query_write_info,
374 &device->availability_shader);
375 if (result != VK_SUCCESS)
376 return result;
377
378 device->copy_results_shaders =
379 vk_alloc(&device->vk.alloc,
380 sizeof(*device->copy_results_shaders) * core_count,
381 8,
382 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
383 if (!device->copy_results_shaders) {
384 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
385 goto err_destroy_availability_query_program;
386 }
387
388 result = pvr_create_multibuffer_compute_query_program(
389 device,
390 copy_query_results_collection,
391 device->copy_results_shaders);
392 if (result != VK_SUCCESS)
393 goto err_vk_free_copy_results_shaders;
394
395 device->reset_queries_shaders =
396 vk_alloc(&device->vk.alloc,
397 sizeof(*device->reset_queries_shaders) * core_count,
398 8,
399 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
400 if (!device->reset_queries_shaders) {
401 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
402 goto err_destroy_copy_results_query_programs;
403 }
404
405 result = pvr_create_multibuffer_compute_query_program(
406 device,
407 reset_query_collection,
408 device->reset_queries_shaders);
409 if (result != VK_SUCCESS)
410 goto err_vk_free_reset_queries_shaders;
411
412 return VK_SUCCESS;
413
414 err_vk_free_reset_queries_shaders:
415 vk_free(&device->vk.alloc, device->reset_queries_shaders);
416
417 err_destroy_copy_results_query_programs:
418 for (uint32_t i = 0; i < core_count; i++) {
419 pvr_destroy_compute_query_program(device,
420 &device->copy_results_shaders[i]);
421 }
422
423 err_vk_free_copy_results_shaders:
424 vk_free(&device->vk.alloc, device->copy_results_shaders);
425
426 err_destroy_availability_query_program:
427 pvr_destroy_compute_query_program(device, &device->availability_shader);
428
429 return result;
430 }
431
pvr_device_destroy_compute_query_programs(struct pvr_device * device)432 void pvr_device_destroy_compute_query_programs(struct pvr_device *device)
433 {
434 const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
435
436 pvr_destroy_compute_query_program(device, &device->availability_shader);
437
438 for (uint32_t i = 0; i < core_count; i++) {
439 pvr_destroy_compute_query_program(device,
440 &device->copy_results_shaders[i]);
441 pvr_destroy_compute_query_program(device,
442 &device->reset_queries_shaders[i]);
443 }
444
445 vk_free(&device->vk.alloc, device->copy_results_shaders);
446 vk_free(&device->vk.alloc, device->reset_queries_shaders);
447 }
448
pvr_init_tex_info(const struct pvr_device_info * dev_info,struct pvr_texture_state_info * tex_info,uint32_t width,pvr_dev_addr_t addr)449 static void pvr_init_tex_info(const struct pvr_device_info *dev_info,
450 struct pvr_texture_state_info *tex_info,
451 uint32_t width,
452 pvr_dev_addr_t addr)
453 {
454 const VkFormat vk_format = VK_FORMAT_R32_UINT;
455 const uint8_t *swizzle_arr = pvr_get_format_swizzle(vk_format);
456 bool is_view_1d = !PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
457 !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2);
458
459 *tex_info = (struct pvr_texture_state_info){
460 .format = vk_format,
461 .mem_layout = PVR_MEMLAYOUT_LINEAR,
462 .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
463 .type = is_view_1d ? VK_IMAGE_VIEW_TYPE_1D : VK_IMAGE_VIEW_TYPE_2D,
464 .is_cube = false,
465 .tex_state_type = PVR_TEXTURE_STATE_SAMPLE,
466 .extent = { .width = width, .height = 1, .depth = 0 },
467 .array_size = 1,
468 .base_level = 0,
469 .mip_levels = 1,
470 .mipmaps_present = false,
471 .sample_count = 1,
472 .stride = width,
473 .offset = 0,
474 .swizzle = { [0] = swizzle_arr[0],
475 [1] = swizzle_arr[1],
476 [2] = swizzle_arr[2],
477 [3] = swizzle_arr[3] },
478 .addr = addr,
479
480 };
481 }
482
483 /* TODO: Split this function into per program type functions. */
pvr_add_query_program(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_query_info * query_info)484 VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
485 const struct pvr_query_info *query_info)
486 {
487 struct pvr_device *device = cmd_buffer->device;
488 const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
489 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
490 const struct pvr_shader_factory_info *shader_factory_info;
491 uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
492 const struct pvr_compute_query_shader *query_prog;
493 struct pvr_private_compute_pipeline pipeline;
494 const uint32_t buffer_count = core_count;
495 struct pvr_texture_state_info tex_info;
496 uint32_t num_query_indices;
497 uint32_t *const_buffer;
498 struct pvr_suballoc_bo *pvr_bo;
499 VkResult result;
500
501 pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER, reg) {
502 reg.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
503 reg.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
504 reg.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
505 reg.minfilter = ROGUE_TEXSTATE_FILTER_POINT;
506 reg.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
507 reg.non_normalized_coords = true;
508 reg.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT;
509 }
510
511 /* clang-format off */
512 pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
513 /* clang-format on */
514
515 switch (query_info->type) {
516 case PVR_QUERY_TYPE_AVAILABILITY_WRITE:
517 /* Adds a compute shader (fenced on the last 3D) that writes a non-zero
518 * value in availability_bo at every index in index_bo.
519 */
520 query_prog = &device->availability_shader;
521 shader_factory_info = &availability_query_write_info;
522 num_query_indices = query_info->availability_write.num_query_indices;
523 break;
524
525 case PVR_QUERY_TYPE_COPY_QUERY_RESULTS:
526 /* Adds a compute shader to copy availability and query value data. */
527 query_prog = &device->copy_results_shaders[buffer_count - 1];
528 shader_factory_info = copy_query_results_collection[buffer_count - 1];
529 num_query_indices = query_info->copy_query_results.query_count;
530 break;
531
532 case PVR_QUERY_TYPE_RESET_QUERY_POOL:
533 /* Adds a compute shader to reset availability and query value data. */
534 query_prog = &device->reset_queries_shaders[buffer_count - 1];
535 shader_factory_info = reset_query_collection[buffer_count - 1];
536 num_query_indices = query_info->reset_query_pool.query_count;
537 break;
538
539 default:
540 unreachable("Invalid query type");
541 }
542
543 result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer,
544 PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
545 if (result != VK_SUCCESS)
546 return result;
547
548 pipeline.pds_code_offset = query_prog->pds_prim_code.code_offset;
549 pipeline.pds_data_offset = query_prog->pds_prim_code.data_offset;
550
551 pipeline.pds_shared_update_code_offset =
552 query_prog->pds_sec_code.code_offset;
553 pipeline.pds_data_size_dw = query_prog->primary_data_size_dw;
554 pipeline.pds_temps_used = query_prog->primary_num_temps;
555
556 pipeline.coeff_regs_count = shader_factory_info->coeff_regs;
557 pipeline.unified_store_regs_count = shader_factory_info->input_regs;
558 pipeline.const_shared_regs_count = shader_factory_info->const_shared_regs;
559
560 const_buffer =
561 vk_alloc(&cmd_buffer->vk.pool->alloc,
562 PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs),
563 8,
564 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
565 if (!const_buffer) {
566 return vk_command_buffer_set_error(&cmd_buffer->vk,
567 VK_ERROR_OUT_OF_HOST_MEMORY);
568 }
569
570 /* clang-format off */
571 #define DRIVER_CONST(index) \
572 assert(shader_factory_info->driver_const_location_map[index] < \
573 shader_factory_info->const_shared_regs); \
574 const_buffer[shader_factory_info->driver_const_location_map[index]]
575 /* clang-format on */
576
577 switch (query_info->type) {
578 case PVR_QUERY_TYPE_AVAILABILITY_WRITE: {
579 uint64_t image_sampler_state[3][ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
580 uint32_t image_sampler_idx = 0;
581
582 memcpy(&image_sampler_state[image_sampler_idx][0],
583 &sampler_state[0],
584 sizeof(sampler_state));
585 image_sampler_idx++;
586
587 pvr_init_tex_info(dev_info,
588 &tex_info,
589 num_query_indices,
590 query_info->availability_write.index_bo->dev_addr);
591
592 result = pvr_pack_tex_state(device,
593 &tex_info,
594 &image_sampler_state[image_sampler_idx][0]);
595 if (result != VK_SUCCESS) {
596 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
597 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
598 }
599
600 image_sampler_idx++;
601
602 pvr_init_tex_info(
603 dev_info,
604 &tex_info,
605 query_info->availability_write.num_queries,
606 query_info->availability_write.availability_bo->dev_addr);
607
608 result = pvr_pack_tex_state(device,
609 &tex_info,
610 &image_sampler_state[image_sampler_idx][0]);
611 if (result != VK_SUCCESS) {
612 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
613 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
614 }
615
616 image_sampler_idx++;
617
618 memcpy(&const_buffer[0],
619 &image_sampler_state[0][0],
620 sizeof(image_sampler_state));
621
622 /* Only PVR_QUERY_AVAILABILITY_WRITE_COUNT driver consts allowed. */
623 assert(shader_factory_info->num_driver_consts ==
624 PVR_QUERY_AVAILABILITY_WRITE_COUNT);
625
626 DRIVER_CONST(PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT) =
627 num_query_indices;
628 break;
629 }
630
631 case PVR_QUERY_TYPE_COPY_QUERY_RESULTS: {
632 PVR_FROM_HANDLE(pvr_query_pool,
633 pool,
634 query_info->copy_query_results.query_pool);
635 PVR_FROM_HANDLE(pvr_buffer,
636 buffer,
637 query_info->copy_query_results.dst_buffer);
638 const uint32_t image_sampler_state_arr_size =
639 (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
640 uint32_t image_sampler_idx = 0;
641 pvr_dev_addr_t addr;
642 uint64_t offset;
643
644 STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
645 if (!image_sampler_state) {
646 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
647
648 return vk_command_buffer_set_error(&cmd_buffer->vk,
649 VK_ERROR_OUT_OF_HOST_MEMORY);
650 }
651
652 #define SAMPLER_ARR_2D(_arr, _i, _j) \
653 _arr[_i * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS + _j]
654
655 memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
656 &sampler_state[0],
657 sizeof(sampler_state));
658 image_sampler_idx++;
659
660 offset = query_info->copy_query_results.first_query * sizeof(uint32_t);
661
662 addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
663
664 pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
665
666 result = pvr_pack_tex_state(
667 device,
668 &tex_info,
669 &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
670 if (result != VK_SUCCESS) {
671 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
672 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
673 }
674
675 image_sampler_idx++;
676
677 for (uint32_t i = 0; i < buffer_count; i++) {
678 addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr,
679 offset + i * pool->result_stride);
680
681 pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
682
683 result = pvr_pack_tex_state(
684 device,
685 &tex_info,
686 &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
687 if (result != VK_SUCCESS) {
688 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
689 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
690 }
691
692 image_sampler_idx++;
693 }
694
695 memcpy(&const_buffer[0],
696 &SAMPLER_ARR_2D(image_sampler_state, 0, 0),
697 image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
698
699 STACK_ARRAY_FINISH(image_sampler_state);
700
701 /* Only PVR_COPY_QUERY_POOL_RESULTS_COUNT driver consts allowed. */
702 assert(shader_factory_info->num_driver_consts ==
703 PVR_COPY_QUERY_POOL_RESULTS_COUNT);
704
705 /* Assert if no memory is bound to destination buffer. */
706 assert(buffer->dev_addr.addr);
707
708 addr = buffer->dev_addr;
709 addr.addr += query_info->copy_query_results.dst_offset;
710 addr.addr += query_info->copy_query_results.first_query *
711 query_info->copy_query_results.stride;
712
713 DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT) = num_query_indices;
714 DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW) = addr.addr &
715 0xFFFFFFFF;
716 DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH) = addr.addr >>
717 32;
718 DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE) =
719 query_info->copy_query_results.stride;
720 DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG) =
721 query_info->copy_query_results.flags & VK_QUERY_RESULT_PARTIAL_BIT;
722 DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG) =
723 query_info->copy_query_results.flags & VK_QUERY_RESULT_64_BIT;
724 DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG) =
725 query_info->copy_query_results.flags &
726 VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
727 break;
728 }
729
730 case PVR_QUERY_TYPE_RESET_QUERY_POOL: {
731 PVR_FROM_HANDLE(pvr_query_pool,
732 pool,
733 query_info->reset_query_pool.query_pool);
734 const uint32_t image_sampler_state_arr_size =
735 (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
736 uint32_t image_sampler_idx = 0;
737 pvr_dev_addr_t addr;
738 uint64_t offset;
739
740 STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
741 if (!image_sampler_state) {
742 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
743
744 return vk_command_buffer_set_error(&cmd_buffer->vk,
745 VK_ERROR_OUT_OF_HOST_MEMORY);
746 }
747
748 memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
749 &sampler_state[0],
750 sizeof(sampler_state));
751 image_sampler_idx++;
752
753 offset = query_info->reset_query_pool.first_query * sizeof(uint32_t);
754
755 for (uint32_t i = 0; i < buffer_count; i++) {
756 addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr,
757 offset + i * pool->result_stride);
758
759 pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
760
761 result = pvr_pack_tex_state(
762 device,
763 &tex_info,
764 &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
765 if (result != VK_SUCCESS) {
766 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
767 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
768 }
769
770 image_sampler_idx++;
771 }
772
773 addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
774
775 pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
776
777 result = pvr_pack_tex_state(
778 device,
779 &tex_info,
780 &SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0));
781 if (result != VK_SUCCESS) {
782 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
783 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
784 }
785
786 image_sampler_idx++;
787
788 #undef SAMPLER_ARR_2D
789
790 memcpy(&const_buffer[0],
791 &image_sampler_state[0],
792 image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
793
794 STACK_ARRAY_FINISH(image_sampler_state);
795
796 /* Only PVR_RESET_QUERY_POOL_COUNT driver consts allowed. */
797 assert(shader_factory_info->num_driver_consts ==
798 PVR_RESET_QUERY_POOL_COUNT);
799
800 DRIVER_CONST(PVR_RESET_QUERY_POOL_INDEX_COUNT) = num_query_indices;
801 break;
802 }
803
804 default:
805 unreachable("Invalid query type");
806 }
807
808 #undef DRIVER_CONST
809
810 for (uint32_t i = 0; i < shader_factory_info->num_static_const; i++) {
811 const struct pvr_static_buffer *load =
812 &shader_factory_info->static_const_buffer[i];
813
814 /* Assert if static const is out of range. */
815 assert(load->dst_idx < shader_factory_info->const_shared_regs);
816 const_buffer[load->dst_idx] = load->value;
817 }
818
819 result = pvr_cmd_buffer_upload_general(
820 cmd_buffer,
821 const_buffer,
822 PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs),
823 &pvr_bo);
824 if (result != VK_SUCCESS) {
825 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
826
827 return result;
828 }
829
830 pipeline.const_buffer_addr = pvr_bo->dev_addr;
831
832 vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
833
834 /* PDS data section for the secondary/constant upload. */
835 result = pvr_write_compute_query_pds_data_section(cmd_buffer,
836 query_prog,
837 &pipeline);
838 if (result != VK_SUCCESS)
839 return result;
840
841 pipeline.workgroup_size.width = ROGUE_MAX_INSTANCES_PER_TASK;
842 pipeline.workgroup_size.height = 1;
843 pipeline.workgroup_size.depth = 1;
844
845 pvr_write_private_compute_dispatch(cmd_buffer, &pipeline, num_query_indices);
846
847 return pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
848 }
849