1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31 #include "anv_measure.h"
32
33 #include "vk_util.h"
34
35 /** \file anv_cmd_buffer.c
36 *
37 * This file contains all of the stuff for emitting commands into a command
38 * buffer. This includes implementations of most of the vkCmd*
39 * entrypoints. This file is concerned entirely with state emission and
40 * not with the command buffer data structure itself. As far as this file
41 * is concerned, most of anv_cmd_buffer is magic.
42 */
43
44 static void
anv_cmd_state_init(struct anv_cmd_buffer * cmd_buffer)45 anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
46 {
47 struct anv_cmd_state *state = &cmd_buffer->state;
48
49 memset(state, 0, sizeof(*state));
50
51 state->current_pipeline = UINT32_MAX;
52 state->gfx.restart_index = UINT32_MAX;
53 state->gfx.object_preemption = true;
54 state->gfx.dirty = 0;
55
56 memcpy(state->gfx.dyn_state.dirty,
57 cmd_buffer->device->gfx_dirty_state,
58 sizeof(state->gfx.dyn_state.dirty));
59 }
60
61 static void
anv_cmd_pipeline_state_finish(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipe_state)62 anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer,
63 struct anv_cmd_pipeline_state *pipe_state)
64 {
65 anv_push_descriptor_set_finish(&pipe_state->push_descriptor);
66 }
67
68 static void
anv_cmd_state_finish(struct anv_cmd_buffer * cmd_buffer)69 anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer)
70 {
71 struct anv_cmd_state *state = &cmd_buffer->state;
72
73 anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base);
74 anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base);
75 }
76
77 static void
anv_cmd_state_reset(struct anv_cmd_buffer * cmd_buffer)78 anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
79 {
80 anv_cmd_state_finish(cmd_buffer);
81 anv_cmd_state_init(cmd_buffer);
82 }
83
84 VkResult
anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer * cmd_buffer)85 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer)
86 {
87 if (cmd_buffer->companion_rcs_cmd_buffer)
88 return VK_SUCCESS;
89
90 VkResult result = VK_SUCCESS;
91 pthread_mutex_lock(&cmd_buffer->device->mutex);
92 VK_FROM_HANDLE(vk_command_pool, pool,
93 cmd_buffer->device->companion_rcs_cmd_pool);
94 assert(pool != NULL);
95
96 struct vk_command_buffer *tmp_cmd_buffer = NULL;
97 result = pool->command_buffer_ops->create(pool, cmd_buffer->vk.level, &tmp_cmd_buffer);
98
99 if (result != VK_SUCCESS)
100 goto unlock_and_return;
101
102 cmd_buffer->companion_rcs_cmd_buffer =
103 container_of(tmp_cmd_buffer, struct anv_cmd_buffer, vk);
104 anv_genX(cmd_buffer->device->info, cmd_buffer_begin_companion)(
105 cmd_buffer->companion_rcs_cmd_buffer, cmd_buffer->vk.level);
106
107 unlock_and_return:
108 pthread_mutex_unlock(&cmd_buffer->device->mutex);
109 return result;
110 }
111
112 static VkResult
anv_create_cmd_buffer(struct vk_command_pool * pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmd_buffer_out)113 anv_create_cmd_buffer(struct vk_command_pool *pool,
114 VkCommandBufferLevel level,
115 struct vk_command_buffer **cmd_buffer_out)
116 {
117 struct anv_device *device =
118 container_of(pool->base.device, struct anv_device, vk);
119 struct anv_cmd_buffer *cmd_buffer;
120 VkResult result;
121
122 cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
123 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
124 if (cmd_buffer == NULL)
125 return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
126
127 result = vk_command_buffer_init(pool, &cmd_buffer->vk,
128 &anv_cmd_buffer_ops, level);
129 if (result != VK_SUCCESS)
130 goto fail_alloc;
131
132 cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations =
133 &cmd_buffer->state.gfx.sample_locations;
134 cmd_buffer->vk.dynamic_graphics_state.vi =
135 &cmd_buffer->state.gfx.vertex_input;
136
137 cmd_buffer->batch.status = VK_SUCCESS;
138 cmd_buffer->generation.batch.status = VK_SUCCESS;
139
140 cmd_buffer->device = device;
141
142 assert(pool->queue_family_index < device->physical->queue.family_count);
143 cmd_buffer->queue_family =
144 &device->physical->queue.families[pool->queue_family_index];
145
146 result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
147 if (result != VK_SUCCESS)
148 goto fail_vk;
149
150 anv_state_stream_init(&cmd_buffer->surface_state_stream,
151 &device->internal_surface_state_pool, 4096);
152 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
153 &device->dynamic_state_pool, 16384);
154 anv_state_stream_init(&cmd_buffer->general_state_stream,
155 &device->general_state_pool, 16384);
156 anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
157 &device->indirect_push_descriptor_pool, 4096);
158 anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
159 &device->push_descriptor_buffer_pool, 4096);
160
161 int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8,
162 sizeof(struct anv_bo *));
163 if (!success)
164 goto fail_batch_bo;
165
166 cmd_buffer->self_mod_locations = NULL;
167 cmd_buffer->companion_rcs_cmd_buffer = NULL;
168 cmd_buffer->is_companion_rcs_cmd_buffer = false;
169
170 cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
171 cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
172
173 memset(&cmd_buffer->generation.shader_state, 0,
174 sizeof(cmd_buffer->generation.shader_state));
175
176 anv_cmd_state_init(cmd_buffer);
177
178 anv_measure_init(cmd_buffer);
179
180 u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
181
182 *cmd_buffer_out = &cmd_buffer->vk;
183
184 return VK_SUCCESS;
185
186 fail_batch_bo:
187 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
188 fail_vk:
189 vk_command_buffer_finish(&cmd_buffer->vk);
190 fail_alloc:
191 vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
192
193 return result;
194 }
195
196 static void
destroy_cmd_buffer(struct anv_cmd_buffer * cmd_buffer)197 destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
198 {
199 u_trace_fini(&cmd_buffer->trace);
200
201 anv_measure_destroy(cmd_buffer);
202
203 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
204
205 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
206 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
207 anv_state_stream_finish(&cmd_buffer->general_state_stream);
208 anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
209 anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
210
211 while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
212 struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
213 anv_bo_pool_free((*bo)->map != NULL ?
214 &cmd_buffer->device->batch_bo_pool :
215 &cmd_buffer->device->bvh_bo_pool, *bo);
216 }
217 u_vector_finish(&cmd_buffer->dynamic_bos);
218
219 anv_cmd_state_finish(cmd_buffer);
220
221 vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer->self_mod_locations);
222
223 vk_command_buffer_finish(&cmd_buffer->vk);
224 vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
225 }
226
227 static void
anv_cmd_buffer_destroy(struct vk_command_buffer * vk_cmd_buffer)228 anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
229 {
230 struct anv_cmd_buffer *cmd_buffer =
231 container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
232 struct anv_device *device = cmd_buffer->device;
233
234 pthread_mutex_lock(&device->mutex);
235 if (cmd_buffer->companion_rcs_cmd_buffer) {
236 destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
237 cmd_buffer->companion_rcs_cmd_buffer = NULL;
238 }
239
240 ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
241
242 destroy_cmd_buffer(cmd_buffer);
243 pthread_mutex_unlock(&device->mutex);
244 }
245
246 static void
reset_cmd_buffer(struct anv_cmd_buffer * cmd_buffer,UNUSED VkCommandBufferResetFlags flags)247 reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
248 UNUSED VkCommandBufferResetFlags flags)
249 {
250 vk_command_buffer_reset(&cmd_buffer->vk);
251
252 cmd_buffer->usage_flags = 0;
253 cmd_buffer->perf_query_pool = NULL;
254 cmd_buffer->is_companion_rcs_cmd_buffer = false;
255 anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
256 anv_cmd_state_reset(cmd_buffer);
257
258 memset(&cmd_buffer->generation.shader_state, 0,
259 sizeof(cmd_buffer->generation.shader_state));
260
261 cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
262 cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
263
264 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
265 anv_state_stream_init(&cmd_buffer->surface_state_stream,
266 &cmd_buffer->device->internal_surface_state_pool, 4096);
267
268 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
269 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
270 &cmd_buffer->device->dynamic_state_pool, 16384);
271
272 anv_state_stream_finish(&cmd_buffer->general_state_stream);
273 anv_state_stream_init(&cmd_buffer->general_state_stream,
274 &cmd_buffer->device->general_state_pool, 16384);
275
276 anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
277 anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
278 &cmd_buffer->device->indirect_push_descriptor_pool,
279 4096);
280
281 anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
282 anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
283 &cmd_buffer->device->push_descriptor_buffer_pool, 4096);
284
285 while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
286 struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
287 anv_device_release_bo(cmd_buffer->device, *bo);
288 }
289
290 anv_measure_reset(cmd_buffer);
291
292 u_trace_fini(&cmd_buffer->trace);
293 u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
294 }
295
296 void
anv_cmd_buffer_reset(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)297 anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
298 UNUSED VkCommandBufferResetFlags flags)
299 {
300 struct anv_cmd_buffer *cmd_buffer =
301 container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
302
303 if (cmd_buffer->companion_rcs_cmd_buffer) {
304 reset_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer, flags);
305 destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
306 cmd_buffer->companion_rcs_cmd_buffer = NULL;
307 }
308
309 ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
310
311 reset_cmd_buffer(cmd_buffer, flags);
312 }
313
314 const struct vk_command_buffer_ops anv_cmd_buffer_ops = {
315 .create = anv_create_cmd_buffer,
316 .reset = anv_cmd_buffer_reset,
317 .destroy = anv_cmd_buffer_destroy,
318 };
319
320 void
anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer * cmd_buffer)321 anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer *cmd_buffer)
322 {
323 const struct intel_device_info *devinfo = cmd_buffer->device->info;
324 anv_genX(devinfo, cmd_buffer_emit_bt_pool_base_address)(cmd_buffer);
325 }
326
327 void
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count)328 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
329 const struct anv_image *image,
330 VkImageAspectFlagBits aspect,
331 enum isl_aux_usage aux_usage,
332 uint32_t level,
333 uint32_t base_layer,
334 uint32_t layer_count)
335 {
336 const struct intel_device_info *devinfo = cmd_buffer->device->info;
337 anv_genX(devinfo, cmd_buffer_mark_image_written)(cmd_buffer, image,
338 aspect, aux_usage,
339 level, base_layer,
340 layer_count);
341 }
342
343 void
anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,const enum isl_format format,const struct isl_swizzle swizzle,union isl_color_value clear_color)344 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
345 const struct anv_image *image,
346 const enum isl_format format,
347 const struct isl_swizzle swizzle,
348 union isl_color_value clear_color)
349 {
350 const struct intel_device_info *devinfo = cmd_buffer->device->info;
351 anv_genX(devinfo, set_fast_clear_state)(cmd_buffer, image, format, swizzle,
352 clear_color);
353 }
354
355 void
anv_cmd_buffer_load_clear_color(struct anv_cmd_buffer * cmd_buffer,struct anv_state state,const struct anv_image_view * iview)356 anv_cmd_buffer_load_clear_color(struct anv_cmd_buffer *cmd_buffer,
357 struct anv_state state,
358 const struct anv_image_view *iview)
359 {
360 const struct intel_device_info *devinfo = cmd_buffer->device->info;
361 anv_genX(devinfo, cmd_buffer_load_clear_color)(cmd_buffer, state, iview);
362 }
363
364 void
anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer * cmd_buffer)365 anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
366 {
367 const struct intel_device_info *devinfo = cmd_buffer->device->info;
368 anv_genX(devinfo, cmd_emit_conditional_render_predicate)(cmd_buffer);
369 }
370
371 static void
clear_pending_query_bits(enum anv_query_bits * query_bits,enum anv_pipe_bits flushed_bits)372 clear_pending_query_bits(enum anv_query_bits *query_bits,
373 enum anv_pipe_bits flushed_bits)
374 {
375 if (flushed_bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
376 *query_bits &= ~ANV_QUERY_WRITES_RT_FLUSH;
377
378 if (flushed_bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
379 *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
380
381 if ((flushed_bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT) &&
382 (flushed_bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT) &&
383 (flushed_bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT))
384 *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
385
386 /* Once RT/TILE have been flushed, we can consider the CS_STALL flush */
387 if ((*query_bits & (ANV_QUERY_WRITES_TILE_FLUSH |
388 ANV_QUERY_WRITES_RT_FLUSH |
389 ANV_QUERY_WRITES_DATA_FLUSH)) == 0 &&
390 (flushed_bits & (ANV_PIPE_END_OF_PIPE_SYNC_BIT | ANV_PIPE_CS_STALL_BIT)))
391 *query_bits &= ~ANV_QUERY_WRITES_CS_STALL;
392 }
393
394 void
anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits flushed_bits)395 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
396 enum anv_pipe_bits flushed_bits)
397 {
398 clear_pending_query_bits(&cmd_buffer->state.queries.clear_bits, flushed_bits);
399 clear_pending_query_bits(&cmd_buffer->state.queries.buffer_write_bits, flushed_bits);
400 }
401
402 static bool
mem_update(void * dst,const void * src,size_t size)403 mem_update(void *dst, const void *src, size_t size)
404 {
405 if (memcmp(dst, src, size) == 0)
406 return false;
407
408 memcpy(dst, src, size);
409 return true;
410 }
411
412 static void
set_dirty_for_bind_map(struct anv_cmd_buffer * cmd_buffer,gl_shader_stage stage,const struct anv_pipeline_bind_map * map)413 set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
414 gl_shader_stage stage,
415 const struct anv_pipeline_bind_map *map)
416 {
417 assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s));
418 if (mem_update(cmd_buffer->state.surface_sha1s[stage],
419 map->surface_sha1, sizeof(map->surface_sha1)))
420 cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
421
422 assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s));
423 if (mem_update(cmd_buffer->state.sampler_sha1s[stage],
424 map->sampler_sha1, sizeof(map->sampler_sha1)))
425 cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
426
427 assert(stage < ARRAY_SIZE(cmd_buffer->state.push_sha1s));
428 if (mem_update(cmd_buffer->state.push_sha1s[stage],
429 map->push_sha1, sizeof(map->push_sha1)))
430 cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage);
431 }
432
433 static void
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipeline_state,struct anv_pipeline * pipeline,VkShaderStageFlags stages)434 anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
435 struct anv_cmd_pipeline_state *pipeline_state,
436 struct anv_pipeline *pipeline,
437 VkShaderStageFlags stages)
438 {
439 struct anv_device *device = cmd_buffer->device;
440 uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
441
442 uint64_t ray_shadow_size =
443 align64(brw_rt_ray_queries_shadow_stacks_size(device->info,
444 pipeline->ray_queries),
445 4096);
446 if (ray_shadow_size > 0 &&
447 (!cmd_buffer->state.ray_query_shadow_bo ||
448 cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
449 unsigned shadow_size_log2 = MAX2(util_logbase2_ceil(ray_shadow_size), 16);
450 unsigned bucket = shadow_size_log2 - 16;
451 assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos[0]));
452
453 struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[idx][bucket]);
454 if (bo == NULL) {
455 struct anv_bo *new_bo;
456 VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
457 ray_shadow_size,
458 ANV_BO_ALLOC_INTERNAL, /* alloc_flags */
459 0, /* explicit_address */
460 &new_bo);
461 if (result != VK_SUCCESS) {
462 anv_batch_set_error(&cmd_buffer->batch, result);
463 return;
464 }
465
466 bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[idx][bucket], NULL, new_bo);
467 if (bo != NULL) {
468 anv_device_release_bo(device, new_bo);
469 } else {
470 bo = new_bo;
471 }
472 }
473 cmd_buffer->state.ray_query_shadow_bo = bo;
474
475 /* Add the ray query buffers to the batch list. */
476 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
477 cmd_buffer->state.ray_query_shadow_bo);
478 }
479
480 /* Add the HW buffer to the list of BO used. */
481 assert(device->ray_query_bo[idx]);
482 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
483 device->ray_query_bo[idx]);
484
485 /* Fill the push constants & mark them dirty. */
486 struct anv_address ray_query_globals_addr =
487 anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
488 pipeline_state->push_constants.ray_query_globals =
489 anv_address_physical(ray_query_globals_addr);
490 cmd_buffer->state.push_constants_dirty |= stages;
491 pipeline_state->push_constants_data_dirty = true;
492 }
493
494 /**
495 * This function compute changes between 2 pipelines and flags the dirty HW
496 * state appropriately.
497 */
498 static void
anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer * cmd_buffer,struct anv_graphics_pipeline * old_pipeline,struct anv_graphics_pipeline * new_pipeline)499 anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
500 struct anv_graphics_pipeline *old_pipeline,
501 struct anv_graphics_pipeline *new_pipeline)
502 {
503 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
504 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
505
506 #define diff_fix_state(bit, name) \
507 do { \
508 /* Fixed states should always have matching sizes */ \
509 assert(old_pipeline == NULL || \
510 old_pipeline->name.len == new_pipeline->name.len); \
511 /* Don't bother memcmp if the state is already dirty */ \
512 if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \
513 (old_pipeline == NULL || \
514 memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
515 &new_pipeline->batch_data[new_pipeline->name.offset], \
516 4 * new_pipeline->name.len) != 0)) \
517 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
518 } while (0)
519 #define diff_var_state(bit, name) \
520 do { \
521 /* Don't bother memcmp if the state is already dirty */ \
522 /* Also if the new state is empty, avoid marking dirty */ \
523 if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \
524 new_pipeline->name.len != 0 && \
525 (old_pipeline == NULL || \
526 old_pipeline->name.len != new_pipeline->name.len || \
527 memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
528 &new_pipeline->batch_data[new_pipeline->name.offset], \
529 4 * new_pipeline->name.len) != 0)) \
530 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
531 } while (0)
532 #define assert_identical(bit, name) \
533 do { \
534 /* Fixed states should always have matching sizes */ \
535 assert(old_pipeline == NULL || \
536 old_pipeline->name.len == new_pipeline->name.len); \
537 assert(old_pipeline == NULL || \
538 memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
539 &new_pipeline->batch_data[new_pipeline->name.offset], \
540 4 * new_pipeline->name.len) == 0); \
541 } while (0)
542 #define assert_empty(name) assert(new_pipeline->name.len == 0)
543
544 /* Compare all states, including partial packed ones, the dynamic part is
545 * left at 0 but the static part could still change.
546 *
547 * We avoid comparing protected packets as all the fields but the scratch
548 * surface are identical. we just need to select the right one at emission.
549 */
550 diff_fix_state(URB, final.urb);
551 diff_fix_state(VF_SGVS, final.vf_sgvs);
552 if (cmd_buffer->device->info->ver >= 11)
553 diff_fix_state(VF_SGVS_2, final.vf_sgvs_2);
554 if (cmd_buffer->device->info->ver >= 12)
555 diff_fix_state(PRIMITIVE_REPLICATION, final.primitive_replication);
556 diff_fix_state(SBE, final.sbe);
557 diff_fix_state(SBE_SWIZ, final.sbe_swiz);
558 diff_fix_state(VS, final.vs);
559 diff_fix_state(HS, final.hs);
560 diff_fix_state(DS, final.ds);
561
562 diff_fix_state(CLIP, partial.clip);
563 diff_fix_state(SF, partial.sf);
564 diff_fix_state(WM, partial.wm);
565 diff_fix_state(STREAMOUT, partial.so);
566 diff_fix_state(GS, partial.gs);
567 diff_fix_state(TE, partial.te);
568 diff_fix_state(VFG, partial.vfg);
569 diff_fix_state(PS, partial.ps);
570 diff_fix_state(PS_EXTRA, partial.ps_extra);
571
572 if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
573 diff_fix_state(TASK_CONTROL, final.task_control);
574 diff_fix_state(TASK_SHADER, final.task_shader);
575 diff_fix_state(TASK_REDISTRIB, final.task_redistrib);
576 diff_fix_state(MESH_CONTROL, final.mesh_control);
577 diff_fix_state(MESH_SHADER, final.mesh_shader);
578 diff_fix_state(MESH_DISTRIB, final.mesh_distrib);
579 diff_fix_state(CLIP_MESH, final.clip_mesh);
580 diff_fix_state(SBE_MESH, final.sbe_mesh);
581 } else {
582 assert_empty(final.task_control);
583 assert_empty(final.task_shader);
584 assert_empty(final.task_redistrib);
585 assert_empty(final.mesh_control);
586 assert_empty(final.mesh_shader);
587 assert_empty(final.mesh_distrib);
588 assert_empty(final.clip_mesh);
589 assert_empty(final.sbe_mesh);
590 }
591
592 /* States that can vary in length */
593 diff_var_state(VF_SGVS_INSTANCING, final.vf_sgvs_instancing);
594 diff_var_state(SO_DECL_LIST, final.so_decl_list);
595
596 #undef diff_fix_state
597 #undef diff_var_state
598 #undef assert_identical
599 #undef assert_empty
600
601 /* We're not diffing the following :
602 * - anv_graphics_pipeline::vertex_input_data
603 * - anv_graphics_pipeline::final::vf_instancing
604 *
605 * since they are tracked by the runtime.
606 */
607 }
608
anv_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)609 void anv_CmdBindPipeline(
610 VkCommandBuffer commandBuffer,
611 VkPipelineBindPoint pipelineBindPoint,
612 VkPipeline _pipeline)
613 {
614 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
615 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
616 struct anv_cmd_pipeline_state *state;
617 VkShaderStageFlags stages = 0;
618
619 switch (pipelineBindPoint) {
620 case VK_PIPELINE_BIND_POINT_COMPUTE: {
621 if (cmd_buffer->state.compute.base.pipeline == pipeline)
622 return;
623
624 cmd_buffer->state.compute.base.pipeline = pipeline;
625 cmd_buffer->state.compute.pipeline_dirty = true;
626
627 struct anv_compute_pipeline *compute_pipeline =
628 anv_pipeline_to_compute(pipeline);
629 set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
630 &compute_pipeline->cs->bind_map);
631
632 state = &cmd_buffer->state.compute.base;
633 stages = VK_SHADER_STAGE_COMPUTE_BIT;
634 break;
635 }
636
637 case VK_PIPELINE_BIND_POINT_GRAPHICS: {
638 struct anv_graphics_pipeline *new_pipeline =
639 anv_pipeline_to_graphics(pipeline);
640
641 /* Apply the non dynamic state from the pipeline */
642 vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
643 &new_pipeline->dynamic_state);
644
645 if (cmd_buffer->state.gfx.base.pipeline == pipeline)
646 return;
647
648 struct anv_graphics_pipeline *old_pipeline =
649 cmd_buffer->state.gfx.base.pipeline == NULL ? NULL :
650 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
651
652 cmd_buffer->state.gfx.base.pipeline = pipeline;
653 cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
654
655 anv_foreach_stage(stage, new_pipeline->base.base.active_stages) {
656 set_dirty_for_bind_map(cmd_buffer, stage,
657 &new_pipeline->base.shaders[stage]->bind_map);
658 }
659
660 state = &cmd_buffer->state.gfx.base;
661 stages = new_pipeline->base.base.active_stages;
662
663
664 /* When the pipeline is using independent states and dynamic buffers,
665 * this will trigger an update of anv_push_constants::dynamic_base_index
666 * & anv_push_constants::dynamic_offsets.
667 */
668 struct anv_push_constants *push =
669 &cmd_buffer->state.gfx.base.push_constants;
670 struct anv_pipeline_sets_layout *layout = &new_pipeline->base.base.layout;
671 if (layout->independent_sets && layout->num_dynamic_buffers > 0) {
672 bool modified = false;
673 for (uint32_t s = 0; s < layout->num_sets; s++) {
674 if (layout->set[s].layout == NULL)
675 continue;
676
677 assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
678 if (layout->set[s].layout->dynamic_offset_count > 0 &&
679 (push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
680 layout->set[s].dynamic_offset_start) {
681 push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
682 push->desc_surface_offsets[s] |= (layout->set[s].dynamic_offset_start &
683 ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
684 modified = true;
685 }
686 }
687 if (modified) {
688 cmd_buffer->state.push_constants_dirty |= stages;
689 state->push_constants_data_dirty = true;
690 }
691 }
692
693 anv_cmd_buffer_flush_pipeline_state(cmd_buffer, old_pipeline, new_pipeline);
694 break;
695 }
696
697 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
698 if (cmd_buffer->state.rt.base.pipeline == pipeline)
699 return;
700
701 cmd_buffer->state.rt.base.pipeline = pipeline;
702 cmd_buffer->state.rt.pipeline_dirty = true;
703
704 struct anv_ray_tracing_pipeline *rt_pipeline =
705 anv_pipeline_to_ray_tracing(pipeline);
706 if (rt_pipeline->stack_size > 0) {
707 anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
708 rt_pipeline->stack_size);
709 }
710
711 state = &cmd_buffer->state.rt.base;
712 break;
713 }
714
715 default:
716 unreachable("invalid bind point");
717 break;
718 }
719
720 if (pipeline->ray_queries > 0)
721 anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
722 }
723
724 static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,const struct anv_descriptor_set_layout * set_layout,VkShaderStageFlags * out_stages)725 anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer *cmd_buffer,
726 VkPipelineBindPoint bind_point,
727 const struct anv_descriptor_set_layout *set_layout,
728 VkShaderStageFlags *out_stages)
729 {
730 *out_stages = set_layout->shader_stages;
731
732 switch (bind_point) {
733 case VK_PIPELINE_BIND_POINT_GRAPHICS:
734 *out_stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
735 (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ?
736 (VK_SHADER_STAGE_TASK_BIT_EXT |
737 VK_SHADER_STAGE_MESH_BIT_EXT) : 0);
738 return &cmd_buffer->state.gfx.base;
739
740 case VK_PIPELINE_BIND_POINT_COMPUTE:
741 *out_stages &= VK_SHADER_STAGE_COMPUTE_BIT;
742 return &cmd_buffer->state.compute.base;
743
744 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
745 *out_stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
746 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
747 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
748 VK_SHADER_STAGE_MISS_BIT_KHR |
749 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
750 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
751 return &cmd_buffer->state.rt.base;
752
753 default:
754 unreachable("invalid bind point");
755 }
756 }
757
758 static void
anv_cmd_buffer_maybe_dirty_descriptor_mode(struct anv_cmd_buffer * cmd_buffer,enum anv_cmd_descriptor_buffer_mode new_mode)759 anv_cmd_buffer_maybe_dirty_descriptor_mode(struct anv_cmd_buffer *cmd_buffer,
760 enum anv_cmd_descriptor_buffer_mode new_mode)
761 {
762 if (cmd_buffer->state.current_db_mode == new_mode)
763 return;
764
765 /* Ensure we program the STATE_BASE_ADDRESS properly at least once */
766 cmd_buffer->state.descriptor_buffers.dirty = true;
767 cmd_buffer->state.pending_db_mode = new_mode;
768 }
769
770 static void
anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,struct anv_pipeline_sets_layout * layout,uint32_t set_index,struct anv_descriptor_set * set,uint32_t * dynamic_offset_count,const uint32_t ** dynamic_offsets)771 anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
772 VkPipelineBindPoint bind_point,
773 struct anv_pipeline_sets_layout *layout,
774 uint32_t set_index,
775 struct anv_descriptor_set *set,
776 uint32_t *dynamic_offset_count,
777 const uint32_t **dynamic_offsets)
778 {
779 /* Either we have no pool because it's a push descriptor or the pool is not
780 * host only :
781 *
782 * VUID-vkCmdBindDescriptorSets-pDescriptorSets-04616:
783 *
784 * "Each element of pDescriptorSets must not have been allocated from a
785 * VkDescriptorPool with the
786 * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT flag set"
787 */
788 assert(!set->pool || !set->pool->host_only);
789
790 struct anv_descriptor_set_layout *set_layout =
791 layout ? layout->set[set_index].layout: set->layout;
792
793 anv_cmd_buffer_maybe_dirty_descriptor_mode(
794 cmd_buffer,
795 (set->layout->flags &
796 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) != 0 ?
797 ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER :
798 ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY);
799
800 VkShaderStageFlags stages;
801 struct anv_cmd_pipeline_state *pipe_state =
802 anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
803 set_layout, &stages);
804
805 VkShaderStageFlags dirty_stages = 0;
806 /* If it's a push descriptor set, we have to flag things as dirty
807 * regardless of whether or not the CPU-side data structure changed as we
808 * may have edited in-place.
809 */
810 if (pipe_state->descriptors[set_index] != set ||
811 anv_descriptor_set_is_push(set)) {
812 pipe_state->descriptors[set_index] = set;
813
814 if (set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) {
815 assert(set->is_push);
816
817 pipe_state->descriptor_buffers[set_index].buffer_index = -1;
818 pipe_state->descriptor_buffers[set_index].buffer_offset = set->desc_offset;
819 pipe_state->descriptor_buffers[set_index].bound = true;
820 cmd_buffer->state.descriptors_dirty |= stages;
821 cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
822 } else {
823 /* When using indirect descriptors, stages that have access to the HW
824 * binding tables, never need to access the
825 * anv_push_constants::desc_offsets fields, because any data they
826 * need from the descriptor buffer is accessible through a binding
827 * table entry. For stages that are "bindless" (Mesh/Task/RT), we
828 * need to provide anv_push_constants::desc_offsets matching the
829 * bound descriptor so that shaders can access the descriptor buffer
830 * through A64 messages.
831 *
832 * With direct descriptors, the shaders can use the
833 * anv_push_constants::desc_offsets to build bindless offsets. So
834 * it's we always need to update the push constant data.
835 */
836 bool update_desc_sets =
837 !cmd_buffer->device->physical->indirect_descriptors ||
838 (stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
839 VK_SHADER_STAGE_MESH_BIT_EXT |
840 VK_SHADER_STAGE_RAYGEN_BIT_KHR |
841 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
842 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
843 VK_SHADER_STAGE_MISS_BIT_KHR |
844 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
845 VK_SHADER_STAGE_CALLABLE_BIT_KHR));
846
847 if (update_desc_sets) {
848 struct anv_push_constants *push = &pipe_state->push_constants;
849 uint64_t offset =
850 anv_address_physical(set->desc_surface_addr) -
851 cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
852 assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
853 push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
854 push->desc_surface_offsets[set_index] |= offset;
855 push->desc_sampler_offsets[set_index] =
856 anv_address_physical(set->desc_sampler_addr) -
857 cmd_buffer->device->physical->va.dynamic_state_pool.addr;
858
859 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
860 set->desc_surface_addr.bo);
861 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
862 set->desc_sampler_addr.bo);
863 }
864 }
865
866 dirty_stages |= stages;
867 }
868
869 if (dynamic_offsets) {
870 if (set_layout->dynamic_offset_count > 0) {
871 struct anv_push_constants *push = &pipe_state->push_constants;
872 assert(layout != NULL);
873 uint32_t dynamic_offset_start =
874 layout->set[set_index].dynamic_offset_start;
875 uint32_t *push_offsets =
876 &push->dynamic_offsets[dynamic_offset_start];
877
878 memcpy(pipe_state->dynamic_offsets[set_index].offsets,
879 *dynamic_offsets,
880 sizeof(uint32_t) * MIN2(*dynamic_offset_count,
881 set_layout->dynamic_offset_count));
882
883 /* Assert that everything is in range */
884 assert(set_layout->dynamic_offset_count <= *dynamic_offset_count);
885 assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
886 ARRAY_SIZE(push->dynamic_offsets));
887
888 for (uint32_t i = 0; i < set_layout->dynamic_offset_count; i++) {
889 if (push_offsets[i] != (*dynamic_offsets)[i]) {
890 pipe_state->dynamic_offsets[set_index].offsets[i] =
891 push_offsets[i] = (*dynamic_offsets)[i];
892 /* dynamic_offset_stages[] elements could contain blanket
893 * values like VK_SHADER_STAGE_ALL, so limit this to the
894 * binding point's bits.
895 */
896 dirty_stages |= set_layout->dynamic_offset_stages[i] & stages;
897 }
898 }
899
900 *dynamic_offsets += set_layout->dynamic_offset_count;
901 *dynamic_offset_count -= set_layout->dynamic_offset_count;
902 }
903 }
904
905 if (set->is_push)
906 cmd_buffer->state.push_descriptors_dirty |= dirty_stages;
907 else
908 cmd_buffer->state.descriptors_dirty |= dirty_stages;
909 cmd_buffer->state.push_constants_dirty |= dirty_stages;
910 pipe_state->push_constants_data_dirty = true;
911 }
912
913 #define ANV_GRAPHICS_STAGE_BITS \
914 (VK_SHADER_STAGE_ALL_GRAPHICS | \
915 VK_SHADER_STAGE_MESH_BIT_EXT | \
916 VK_SHADER_STAGE_TASK_BIT_EXT)
917
918 #define ANV_RT_STAGE_BITS \
919 (VK_SHADER_STAGE_RAYGEN_BIT_KHR | \
920 VK_SHADER_STAGE_ANY_HIT_BIT_KHR | \
921 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | \
922 VK_SHADER_STAGE_MISS_BIT_KHR | \
923 VK_SHADER_STAGE_INTERSECTION_BIT_KHR | \
924 VK_SHADER_STAGE_CALLABLE_BIT_KHR)
925
anv_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pInfo)926 void anv_CmdBindDescriptorSets2KHR(
927 VkCommandBuffer commandBuffer,
928 const VkBindDescriptorSetsInfoKHR* pInfo)
929 {
930 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
931 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
932 struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
933
934 assert(pInfo->firstSet + pInfo->descriptorSetCount <= MAX_SETS);
935
936 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
937 uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
938 const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
939
940 for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
941 ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
942 if (set == NULL)
943 continue;
944 anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
945 VK_PIPELINE_BIND_POINT_COMPUTE,
946 layout, pInfo->firstSet + i, set,
947 &dynamicOffsetCount,
948 &pDynamicOffsets);
949 }
950 }
951 if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
952 uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
953 const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
954
955 for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
956 ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
957 if (set == NULL)
958 continue;
959 anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
960 VK_PIPELINE_BIND_POINT_GRAPHICS,
961 layout, pInfo->firstSet + i, set,
962 &dynamicOffsetCount,
963 &pDynamicOffsets);
964 }
965 }
966 if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
967 uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
968 const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
969
970 for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
971 ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
972 if (set == NULL)
973 continue;
974 anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
975 VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
976 layout, pInfo->firstSet + i, set,
977 &dynamicOffsetCount,
978 &pDynamicOffsets);
979 }
980 }
981 }
982
anv_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,uint32_t bufferCount,const VkDescriptorBufferBindingInfoEXT * pBindingInfos)983 void anv_CmdBindDescriptorBuffersEXT(
984 VkCommandBuffer commandBuffer,
985 uint32_t bufferCount,
986 const VkDescriptorBufferBindingInfoEXT* pBindingInfos)
987 {
988 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
989 struct anv_cmd_state *state = &cmd_buffer->state;
990
991 for (uint32_t i = 0; i < bufferCount; i++) {
992 assert(pBindingInfos[i].address >= cmd_buffer->device->physical->va.dynamic_visible_pool.addr &&
993 pBindingInfos[i].address < (cmd_buffer->device->physical->va.dynamic_visible_pool.addr +
994 cmd_buffer->device->physical->va.dynamic_visible_pool.size));
995
996 if (state->descriptor_buffers.address[i] != pBindingInfos[i].address) {
997 state->descriptor_buffers.address[i] = pBindingInfos[i].address;
998 if (pBindingInfos[i].usage & VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT)
999 state->descriptor_buffers.surfaces_address = pBindingInfos[i].address;
1000 if (pBindingInfos[i].usage & VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)
1001 state->descriptor_buffers.samplers_address = pBindingInfos[i].address;
1002 state->descriptor_buffers.dirty = true;
1003 state->descriptor_buffers.offsets_dirty = ~0;
1004 }
1005 }
1006
1007 anv_cmd_buffer_maybe_dirty_descriptor_mode(cmd_buffer,
1008 ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER);
1009 }
1010
1011 static void
anv_cmd_buffer_set_descriptor_buffer_offsets(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,struct anv_pipeline_layout * layout,uint32_t first_set,uint32_t set_count,const VkDeviceSize * buffer_offsets,const uint32_t * buffer_indices)1012 anv_cmd_buffer_set_descriptor_buffer_offsets(struct anv_cmd_buffer *cmd_buffer,
1013 VkPipelineBindPoint bind_point,
1014 struct anv_pipeline_layout *layout,
1015 uint32_t first_set,
1016 uint32_t set_count,
1017 const VkDeviceSize *buffer_offsets,
1018 const uint32_t *buffer_indices)
1019 {
1020 for (uint32_t i = 0; i < set_count; i++) {
1021 const uint32_t set_index = first_set + i;
1022
1023 const struct anv_descriptor_set_layout *set_layout =
1024 layout->sets_layout.set[set_index].layout;
1025 VkShaderStageFlags stages;
1026 struct anv_cmd_pipeline_state *pipe_state =
1027 anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
1028 set_layout, &stages);
1029
1030 if (buffer_offsets[i] != pipe_state->descriptor_buffers[set_index].buffer_offset ||
1031 buffer_indices[i] != pipe_state->descriptor_buffers[set_index].buffer_index ||
1032 !pipe_state->descriptor_buffers[set_index].bound) {
1033 pipe_state->descriptor_buffers[set_index].buffer_index = buffer_indices[i];
1034 pipe_state->descriptor_buffers[set_index].buffer_offset = buffer_offsets[i];
1035 cmd_buffer->state.descriptors_dirty |= stages;
1036 cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
1037 }
1038 pipe_state->descriptor_buffers[set_index].bound = true;
1039 }
1040 }
1041
anv_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,const VkSetDescriptorBufferOffsetsInfoEXT * pSetDescriptorBufferOffsetsInfo)1042 void anv_CmdSetDescriptorBufferOffsets2EXT(
1043 VkCommandBuffer commandBuffer,
1044 const VkSetDescriptorBufferOffsetsInfoEXT* pSetDescriptorBufferOffsetsInfo)
1045 {
1046 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1047 ANV_FROM_HANDLE(anv_pipeline_layout, layout, pSetDescriptorBufferOffsetsInfo->layout);
1048
1049 if (pSetDescriptorBufferOffsetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
1050 anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
1051 VK_PIPELINE_BIND_POINT_COMPUTE,
1052 layout,
1053 pSetDescriptorBufferOffsetsInfo->firstSet,
1054 pSetDescriptorBufferOffsetsInfo->setCount,
1055 pSetDescriptorBufferOffsetsInfo->pOffsets,
1056 pSetDescriptorBufferOffsetsInfo->pBufferIndices);
1057 }
1058 if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
1059 anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
1060 VK_PIPELINE_BIND_POINT_GRAPHICS,
1061 layout,
1062 pSetDescriptorBufferOffsetsInfo->firstSet,
1063 pSetDescriptorBufferOffsetsInfo->setCount,
1064 pSetDescriptorBufferOffsetsInfo->pOffsets,
1065 pSetDescriptorBufferOffsetsInfo->pBufferIndices);
1066 }
1067 if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_RT_STAGE_BITS) {
1068 anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
1069 VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
1070 layout,
1071 pSetDescriptorBufferOffsetsInfo->firstSet,
1072 pSetDescriptorBufferOffsetsInfo->setCount,
1073 pSetDescriptorBufferOffsetsInfo->pOffsets,
1074 pSetDescriptorBufferOffsetsInfo->pBufferIndices);
1075 }
1076 }
1077
anv_CmdBindDescriptorBufferEmbeddedSamplers2EXT(VkCommandBuffer commandBuffer,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * pBindDescriptorBufferEmbeddedSamplersInfo)1078 void anv_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
1079 VkCommandBuffer commandBuffer,
1080 const VkBindDescriptorBufferEmbeddedSamplersInfoEXT* pBindDescriptorBufferEmbeddedSamplersInfo)
1081 {
1082 /* no-op */
1083 }
1084
anv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1085 void anv_CmdBindVertexBuffers2(
1086 VkCommandBuffer commandBuffer,
1087 uint32_t firstBinding,
1088 uint32_t bindingCount,
1089 const VkBuffer* pBuffers,
1090 const VkDeviceSize* pOffsets,
1091 const VkDeviceSize* pSizes,
1092 const VkDeviceSize* pStrides)
1093 {
1094 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1095 struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
1096
1097 /* We have to defer setting up vertex buffer since we need the buffer
1098 * stride from the pipeline. */
1099
1100 assert(firstBinding + bindingCount <= MAX_VBS);
1101 for (uint32_t i = 0; i < bindingCount; i++) {
1102 ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
1103
1104 if (buffer == NULL) {
1105 vb[firstBinding + i] = (struct anv_vertex_binding) {
1106 .buffer = NULL,
1107 };
1108 } else {
1109 vb[firstBinding + i] = (struct anv_vertex_binding) {
1110 .buffer = buffer,
1111 .offset = pOffsets[i],
1112 .size = vk_buffer_range(&buffer->vk, pOffsets[i],
1113 pSizes ? pSizes[i] : VK_WHOLE_SIZE),
1114 };
1115 }
1116 cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
1117 }
1118
1119 if (pStrides != NULL) {
1120 vk_cmd_set_vertex_binding_strides(&cmd_buffer->vk, firstBinding,
1121 bindingCount, pStrides);
1122 }
1123 }
1124
anv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes)1125 void anv_CmdBindTransformFeedbackBuffersEXT(
1126 VkCommandBuffer commandBuffer,
1127 uint32_t firstBinding,
1128 uint32_t bindingCount,
1129 const VkBuffer* pBuffers,
1130 const VkDeviceSize* pOffsets,
1131 const VkDeviceSize* pSizes)
1132 {
1133 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1134 struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
1135
1136 /* We have to defer setting up vertex buffer since we need the buffer
1137 * stride from the pipeline. */
1138
1139 assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
1140 for (uint32_t i = 0; i < bindingCount; i++) {
1141 if (pBuffers[i] == VK_NULL_HANDLE) {
1142 xfb[firstBinding + i].buffer = NULL;
1143 } else {
1144 ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
1145 xfb[firstBinding + i].buffer = buffer;
1146 xfb[firstBinding + i].offset = pOffsets[i];
1147 xfb[firstBinding + i].size =
1148 vk_buffer_range(&buffer->vk, pOffsets[i],
1149 pSizes ? pSizes[i] : VK_WHOLE_SIZE);
1150 }
1151 }
1152 }
1153
1154 enum isl_format
anv_isl_format_for_descriptor_type(const struct anv_device * device,VkDescriptorType type)1155 anv_isl_format_for_descriptor_type(const struct anv_device *device,
1156 VkDescriptorType type)
1157 {
1158 switch (type) {
1159 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1160 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1161 return device->physical->compiler->indirect_ubos_use_sampler ?
1162 ISL_FORMAT_R32G32B32A32_FLOAT : ISL_FORMAT_RAW;
1163
1164 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1165 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1166 return ISL_FORMAT_RAW;
1167
1168 default:
1169 unreachable("Invalid descriptor type");
1170 }
1171 }
1172
1173 struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer * cmd_buffer,const void * data,uint32_t size,uint32_t alignment)1174 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
1175 const void *data, uint32_t size, uint32_t alignment)
1176 {
1177 struct anv_state state;
1178
1179 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
1180 memcpy(state.map, data, size);
1181
1182 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
1183
1184 return state;
1185 }
1186
1187 struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer * cmd_buffer,uint32_t * a,uint32_t * b,uint32_t dwords,uint32_t alignment)1188 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
1189 uint32_t *a, uint32_t *b,
1190 uint32_t dwords, uint32_t alignment)
1191 {
1192 struct anv_state state;
1193 uint32_t *p;
1194
1195 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1196 dwords * 4, alignment);
1197 p = state.map;
1198 for (uint32_t i = 0; i < dwords; i++) {
1199 assert((a[i] & b[i]) == 0);
1200 p[i] = a[i] | b[i];
1201 }
1202
1203 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
1204
1205 return state;
1206 }
1207
1208 struct anv_state
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer * cmd_buffer)1209 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
1210 {
1211 const struct anv_push_constants *data =
1212 &cmd_buffer->state.gfx.base.push_constants;
1213
1214 struct anv_state state =
1215 anv_cmd_buffer_alloc_temporary_state(cmd_buffer,
1216 sizeof(struct anv_push_constants),
1217 32 /* bottom 5 bits MBZ */);
1218 if (state.alloc_size == 0)
1219 return state;
1220
1221 memcpy(state.map, data->client_data,
1222 cmd_buffer->state.gfx.base.push_constants_client_size);
1223 memcpy(state.map + sizeof(data->client_data),
1224 &data->desc_surface_offsets,
1225 sizeof(struct anv_push_constants) - sizeof(data->client_data));
1226
1227 return state;
1228 }
1229
1230 struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer * cmd_buffer)1231 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
1232 {
1233 const struct intel_device_info *devinfo = cmd_buffer->device->info;
1234 struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1235 struct anv_push_constants *data = &pipe_state->push_constants;
1236 struct anv_compute_pipeline *pipeline =
1237 anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline);
1238 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1239 const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
1240
1241 const struct intel_cs_dispatch_info dispatch =
1242 brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1243 const unsigned total_push_constants_size =
1244 brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
1245 if (total_push_constants_size == 0)
1246 return (struct anv_state) { .offset = 0 };
1247
1248 const unsigned push_constant_alignment = 64;
1249 const unsigned aligned_total_push_constants_size =
1250 ALIGN(total_push_constants_size, push_constant_alignment);
1251 struct anv_state state;
1252 if (devinfo->verx10 >= 125) {
1253 state = anv_state_stream_alloc(&cmd_buffer->general_state_stream,
1254 aligned_total_push_constants_size,
1255 push_constant_alignment);
1256 } else {
1257 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1258 aligned_total_push_constants_size,
1259 push_constant_alignment);
1260 }
1261 if (state.map == NULL)
1262 return state;
1263
1264 void *dst = state.map;
1265 const void *src = (char *)data + (range->start * 32);
1266
1267 if (cs_prog_data->push.cross_thread.size > 0) {
1268 memcpy(dst, src, cs_prog_data->push.cross_thread.size);
1269 dst += cs_prog_data->push.cross_thread.size;
1270 src += cs_prog_data->push.cross_thread.size;
1271 }
1272
1273 if (cs_prog_data->push.per_thread.size > 0) {
1274 for (unsigned t = 0; t < dispatch.threads; t++) {
1275 memcpy(dst, src, cs_prog_data->push.per_thread.size);
1276
1277 uint32_t *subgroup_id = dst +
1278 offsetof(struct anv_push_constants, cs.subgroup_id) -
1279 (range->start * 32 + cs_prog_data->push.cross_thread.size);
1280 *subgroup_id = t;
1281
1282 dst += cs_prog_data->push.per_thread.size;
1283 }
1284 }
1285
1286 return state;
1287 }
1288
anv_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pInfo)1289 void anv_CmdPushConstants2KHR(
1290 VkCommandBuffer commandBuffer,
1291 const VkPushConstantsInfoKHR* pInfo)
1292 {
1293 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1294
1295 if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
1296 struct anv_cmd_pipeline_state *pipe_state =
1297 &cmd_buffer->state.gfx.base;
1298
1299 memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1300 pInfo->pValues, pInfo->size);
1301 pipe_state->push_constants_data_dirty = true;
1302 pipe_state->push_constants_client_size = MAX2(
1303 pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
1304 }
1305 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
1306 struct anv_cmd_pipeline_state *pipe_state =
1307 &cmd_buffer->state.compute.base;
1308
1309 memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1310 pInfo->pValues, pInfo->size);
1311 pipe_state->push_constants_data_dirty = true;
1312 pipe_state->push_constants_client_size = MAX2(
1313 pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
1314 }
1315 if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
1316 struct anv_cmd_pipeline_state *pipe_state =
1317 &cmd_buffer->state.rt.base;
1318
1319 memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1320 pInfo->pValues, pInfo->size);
1321 pipe_state->push_constants_data_dirty = true;
1322 pipe_state->push_constants_client_size = MAX2(
1323 pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
1324 }
1325
1326 cmd_buffer->state.push_constants_dirty |= pInfo->stageFlags;
1327 }
1328
1329 static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1330 anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer *cmd_buffer,
1331 VkPipelineBindPoint bind_point)
1332 {
1333 switch (bind_point) {
1334 case VK_PIPELINE_BIND_POINT_GRAPHICS:
1335 return &cmd_buffer->state.gfx.base;
1336 case VK_PIPELINE_BIND_POINT_COMPUTE:
1337 return &cmd_buffer->state.compute.base;
1338 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1339 return &cmd_buffer->state.rt.base;
1340 break;
1341 default:
1342 unreachable("invalid bind point");
1343 }
1344 }
1345
1346 static void
anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,const VkPushDescriptorSetInfoKHR * pInfo)1347 anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
1348 VkPipelineBindPoint bind_point,
1349 const VkPushDescriptorSetInfoKHR *pInfo)
1350 {
1351 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1352 struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1353
1354 assert(pInfo->set < MAX_SETS);
1355
1356 struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1357 struct anv_push_descriptor_set *push_set =
1358 &anv_cmd_buffer_get_pipe_state(cmd_buffer,
1359 bind_point)->push_descriptor;
1360 if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1361 return;
1362
1363 anv_descriptor_set_write(cmd_buffer->device, &push_set->set,
1364 pInfo->descriptorWriteCount,
1365 pInfo->pDescriptorWrites);
1366
1367 anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point,
1368 layout, pInfo->set, &push_set->set,
1369 NULL, NULL);
1370 }
1371
anv_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pInfo)1372 void anv_CmdPushDescriptorSet2KHR(
1373 VkCommandBuffer commandBuffer,
1374 const VkPushDescriptorSetInfoKHR* pInfo)
1375 {
1376 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1377
1378 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
1379 anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1380 VK_PIPELINE_BIND_POINT_COMPUTE,
1381 pInfo);
1382 if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS)
1383 anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1384 VK_PIPELINE_BIND_POINT_GRAPHICS,
1385 pInfo);
1386 if (pInfo->stageFlags & ANV_RT_STAGE_BITS)
1387 anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1388 VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
1389 pInfo);
1390 }
1391
anv_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pInfo)1392 void anv_CmdPushDescriptorSetWithTemplate2KHR(
1393 VkCommandBuffer commandBuffer,
1394 const VkPushDescriptorSetWithTemplateInfoKHR* pInfo)
1395 {
1396 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1397 VK_FROM_HANDLE(vk_descriptor_update_template, template,
1398 pInfo->descriptorUpdateTemplate);
1399 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1400 struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1401
1402 assert(pInfo->set < MAX_PUSH_DESCRIPTORS);
1403
1404 struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1405 UNUSED VkShaderStageFlags stages;
1406 struct anv_cmd_pipeline_state *pipe_state =
1407 anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, template->bind_point,
1408 set_layout, &stages);
1409 struct anv_push_descriptor_set *push_set = &pipe_state->push_descriptor;
1410 if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1411 return;
1412
1413 anv_descriptor_set_write_template(cmd_buffer->device, &push_set->set,
1414 template,
1415 pInfo->pData);
1416
1417 anv_cmd_buffer_bind_descriptor_set(cmd_buffer, template->bind_point,
1418 layout, pInfo->set, &push_set->set,
1419 NULL, NULL);
1420 }
1421
anv_CmdSetRayTracingPipelineStackSizeKHR(VkCommandBuffer commandBuffer,uint32_t pipelineStackSize)1422 void anv_CmdSetRayTracingPipelineStackSizeKHR(
1423 VkCommandBuffer commandBuffer,
1424 uint32_t pipelineStackSize)
1425 {
1426 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1427 struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
1428 struct anv_device *device = cmd_buffer->device;
1429
1430 if (anv_batch_has_error(&cmd_buffer->batch))
1431 return;
1432
1433 uint32_t stack_ids_per_dss = 2048; /* TODO */
1434
1435 unsigned stack_size_log2 = util_logbase2_ceil(pipelineStackSize);
1436 if (stack_size_log2 < 10)
1437 stack_size_log2 = 10;
1438
1439 if (rt->scratch.layout.total_size == 1 << stack_size_log2)
1440 return;
1441
1442 brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info,
1443 stack_ids_per_dss, 1 << stack_size_log2);
1444
1445 unsigned bucket = stack_size_log2 - 10;
1446 assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
1447
1448 struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
1449 if (bo == NULL) {
1450 struct anv_bo *new_bo;
1451 VkResult result = anv_device_alloc_bo(device, "RT scratch",
1452 rt->scratch.layout.total_size,
1453 ANV_BO_ALLOC_INTERNAL, /* alloc_flags */
1454 0, /* explicit_address */
1455 &new_bo);
1456 if (result != VK_SUCCESS) {
1457 rt->scratch.layout.total_size = 0;
1458 anv_batch_set_error(&cmd_buffer->batch, result);
1459 return;
1460 }
1461
1462 bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
1463 if (bo != NULL) {
1464 anv_device_release_bo(device, bo);
1465 } else {
1466 bo = new_bo;
1467 }
1468 }
1469
1470 rt->scratch.bo = bo;
1471 }
1472
1473 void
anv_cmd_buffer_save_state(struct anv_cmd_buffer * cmd_buffer,uint32_t flags,struct anv_cmd_saved_state * state)1474 anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
1475 uint32_t flags,
1476 struct anv_cmd_saved_state *state)
1477 {
1478 state->flags = flags;
1479
1480 /* we only support the compute pipeline at the moment */
1481 assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1482 const struct anv_cmd_pipeline_state *pipe_state =
1483 &cmd_buffer->state.compute.base;
1484
1485 if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE)
1486 state->pipeline = pipe_state->pipeline;
1487
1488 if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0)
1489 state->descriptor_set[0] = pipe_state->descriptors[0];
1490
1491 if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL) {
1492 for (uint32_t i = 0; i < MAX_SETS; i++) {
1493 state->descriptor_set[i] = pipe_state->descriptors[i];
1494 }
1495 }
1496
1497 if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1498 memcpy(state->push_constants, pipe_state->push_constants.client_data,
1499 sizeof(state->push_constants));
1500 }
1501 }
1502
1503 void
anv_cmd_buffer_restore_state(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_saved_state * state)1504 anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
1505 struct anv_cmd_saved_state *state)
1506 {
1507 VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
1508
1509 assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1510 const VkPipelineBindPoint bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1511 const VkShaderStageFlags stage_flags = VK_SHADER_STAGE_COMPUTE_BIT;
1512 struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1513
1514 if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE) {
1515 if (state->pipeline) {
1516 anv_CmdBindPipeline(cmd_buffer_, bind_point,
1517 anv_pipeline_to_handle(state->pipeline));
1518 } else {
1519 pipe_state->pipeline = NULL;
1520 }
1521 }
1522
1523 if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0) {
1524 if (state->descriptor_set[0]) {
1525 anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, 0,
1526 state->descriptor_set[0], NULL,
1527 NULL);
1528 } else {
1529 pipe_state->descriptors[0] = NULL;
1530 }
1531 }
1532
1533 if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL) {
1534 for (uint32_t i = 0; i < MAX_SETS; i++) {
1535 if (state->descriptor_set[i]) {
1536 anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, i,
1537 state->descriptor_set[i], NULL,
1538 NULL);
1539 } else {
1540 pipe_state->descriptors[i] = NULL;
1541 }
1542 }
1543 }
1544
1545 if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1546 VkPushConstantsInfoKHR push_info = {
1547 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
1548 .layout = VK_NULL_HANDLE,
1549 .stageFlags = stage_flags,
1550 .offset = 0,
1551 .size = sizeof(state->push_constants),
1552 .pValues = state->push_constants,
1553 };
1554 anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
1555 }
1556 }
1557
1558 void
anv_cmd_write_buffer_cp(VkCommandBuffer commandBuffer,VkDeviceAddress dstAddr,void * data,uint32_t size)1559 anv_cmd_write_buffer_cp(VkCommandBuffer commandBuffer,
1560 VkDeviceAddress dstAddr,
1561 void *data,
1562 uint32_t size)
1563 {
1564 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1565 anv_genX(cmd_buffer->device->info, cmd_write_buffer_cp)(cmd_buffer, dstAddr,
1566 data, size);
1567 }
1568
1569 void
anv_cmd_flush_buffer_write_cp(VkCommandBuffer commandBuffer)1570 anv_cmd_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
1571 {
1572 /* TODO: cmd_write_buffer_cp is implemented with MI store +
1573 * ForceWriteCompletionCheck so that should make the content globally
1574 * observable.
1575 *
1576 * If we encounter any functional or perf bottleneck issues, let's revisit
1577 * this helper and add ANV_PIPE_HDC_PIPELINE_FLUSH_BIT +
1578 * ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT +
1579 * ANV_PIPE_DATA_CACHE_FLUSH_BIT.
1580 */
1581 }
1582
1583 void
anv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,uint32_t invocations_x,uint32_t invocations_y,uint32_t invocations_z)1584 anv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,
1585 uint32_t invocations_x,
1586 uint32_t invocations_y,
1587 uint32_t invocations_z)
1588 {
1589 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1590
1591 anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
1592 (commandBuffer, invocations_x, invocations_y, invocations_z);
1593 }
1594