1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31 #include "anv_measure.h"
32
33 #include "vk_util.h"
34
35 /** \file anv_cmd_buffer.c
36 *
37 * This file contains all of the stuff for emitting commands into a command
38 * buffer. This includes implementations of most of the vkCmd*
39 * entrypoints. This file is concerned entirely with state emission and
40 * not with the command buffer data structure itself. As far as this file
41 * is concerned, most of anv_cmd_buffer is magic.
42 */
43
44 static void
anv_cmd_state_init(struct anv_cmd_buffer * cmd_buffer)45 anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
46 {
47 struct anv_cmd_state *state = &cmd_buffer->state;
48
49 memset(state, 0, sizeof(*state));
50
51 state->current_pipeline = UINT32_MAX;
52 state->gfx.restart_index = UINT32_MAX;
53 state->gfx.object_preemption = true;
54 state->gfx.dirty = 0;
55
56 memcpy(state->gfx.dyn_state.dirty,
57 cmd_buffer->device->gfx_dirty_state,
58 sizeof(state->gfx.dyn_state.dirty));
59 }
60
61 static void
anv_cmd_pipeline_state_finish(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipe_state)62 anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer,
63 struct anv_cmd_pipeline_state *pipe_state)
64 {
65 anv_push_descriptor_set_finish(&pipe_state->push_descriptor);
66 }
67
68 static void
anv_cmd_state_finish(struct anv_cmd_buffer * cmd_buffer)69 anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer)
70 {
71 struct anv_cmd_state *state = &cmd_buffer->state;
72
73 anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base);
74 anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base);
75 }
76
77 static void
anv_cmd_state_reset(struct anv_cmd_buffer * cmd_buffer)78 anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
79 {
80 anv_cmd_state_finish(cmd_buffer);
81 anv_cmd_state_init(cmd_buffer);
82
83 cmd_buffer->last_compute_walker = NULL;
84 cmd_buffer->last_indirect_dispatch = NULL;
85 }
86
87 VkResult
anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer * cmd_buffer)88 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer)
89 {
90 if (cmd_buffer->companion_rcs_cmd_buffer)
91 return VK_SUCCESS;
92
93 VkResult result = VK_SUCCESS;
94 pthread_mutex_lock(&cmd_buffer->device->mutex);
95 VK_FROM_HANDLE(vk_command_pool, pool,
96 cmd_buffer->device->companion_rcs_cmd_pool);
97 assert(pool != NULL);
98
99 struct vk_command_buffer *tmp_cmd_buffer = NULL;
100 result = pool->command_buffer_ops->create(pool, &tmp_cmd_buffer);
101
102 if (result != VK_SUCCESS)
103 goto unlock_and_return;
104
105 cmd_buffer->companion_rcs_cmd_buffer =
106 container_of(tmp_cmd_buffer, struct anv_cmd_buffer, vk);
107 cmd_buffer->companion_rcs_cmd_buffer->vk.level = cmd_buffer->vk.level;
108 cmd_buffer->companion_rcs_cmd_buffer->is_companion_rcs_cmd_buffer = true;
109
110 unlock_and_return:
111 pthread_mutex_unlock(&cmd_buffer->device->mutex);
112 return result;
113 }
114
115 static VkResult
anv_create_cmd_buffer(struct vk_command_pool * pool,struct vk_command_buffer ** cmd_buffer_out)116 anv_create_cmd_buffer(struct vk_command_pool *pool,
117 struct vk_command_buffer **cmd_buffer_out)
118 {
119 struct anv_device *device =
120 container_of(pool->base.device, struct anv_device, vk);
121 struct anv_cmd_buffer *cmd_buffer;
122 VkResult result;
123
124 cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
125 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
126 if (cmd_buffer == NULL)
127 return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
128
129 result = vk_command_buffer_init(pool, &cmd_buffer->vk,
130 &anv_cmd_buffer_ops, 0);
131 if (result != VK_SUCCESS)
132 goto fail_alloc;
133
134 cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations =
135 &cmd_buffer->state.gfx.sample_locations;
136 cmd_buffer->vk.dynamic_graphics_state.vi =
137 &cmd_buffer->state.gfx.vertex_input;
138
139 cmd_buffer->batch.status = VK_SUCCESS;
140 cmd_buffer->generation.batch.status = VK_SUCCESS;
141
142 cmd_buffer->device = device;
143
144 assert(pool->queue_family_index < device->physical->queue.family_count);
145 cmd_buffer->queue_family =
146 &device->physical->queue.families[pool->queue_family_index];
147
148 result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
149 if (result != VK_SUCCESS)
150 goto fail_vk;
151
152 anv_state_stream_init(&cmd_buffer->surface_state_stream,
153 &device->internal_surface_state_pool, 4096);
154 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
155 &device->dynamic_state_pool, 16384);
156 anv_state_stream_init(&cmd_buffer->general_state_stream,
157 &device->general_state_pool, 16384);
158 anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
159 &device->indirect_push_descriptor_pool, 4096);
160
161 int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8,
162 sizeof(struct anv_bo *));
163 if (!success)
164 goto fail_batch_bo;
165
166 cmd_buffer->self_mod_locations = NULL;
167 cmd_buffer->companion_rcs_cmd_buffer = NULL;
168 cmd_buffer->is_companion_rcs_cmd_buffer = false;
169
170 cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
171 cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
172
173 cmd_buffer->last_compute_walker = NULL;
174 cmd_buffer->last_indirect_dispatch = NULL;
175
176 memset(&cmd_buffer->generation.shader_state, 0,
177 sizeof(cmd_buffer->generation.shader_state));
178
179 anv_cmd_state_init(cmd_buffer);
180
181 anv_measure_init(cmd_buffer);
182
183 u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
184
185 *cmd_buffer_out = &cmd_buffer->vk;
186
187 return VK_SUCCESS;
188
189 fail_batch_bo:
190 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
191 fail_vk:
192 vk_command_buffer_finish(&cmd_buffer->vk);
193 fail_alloc:
194 vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
195
196 return result;
197 }
198
199 static void
destroy_cmd_buffer(struct anv_cmd_buffer * cmd_buffer)200 destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
201 {
202 u_trace_fini(&cmd_buffer->trace);
203
204 anv_measure_destroy(cmd_buffer);
205
206 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
207
208 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
209 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
210 anv_state_stream_finish(&cmd_buffer->general_state_stream);
211 anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
212
213 while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
214 struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
215 anv_bo_pool_free((*bo)->map != NULL ?
216 &cmd_buffer->device->batch_bo_pool :
217 &cmd_buffer->device->bvh_bo_pool, *bo);
218 }
219 u_vector_finish(&cmd_buffer->dynamic_bos);
220
221 anv_cmd_state_finish(cmd_buffer);
222
223 vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer->self_mod_locations);
224
225 vk_command_buffer_finish(&cmd_buffer->vk);
226 vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
227 }
228
229 static void
anv_cmd_buffer_destroy(struct vk_command_buffer * vk_cmd_buffer)230 anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
231 {
232 struct anv_cmd_buffer *cmd_buffer =
233 container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
234 struct anv_device *device = cmd_buffer->device;
235
236 pthread_mutex_lock(&device->mutex);
237 if (cmd_buffer->companion_rcs_cmd_buffer) {
238 destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
239 cmd_buffer->companion_rcs_cmd_buffer = NULL;
240 }
241
242 ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
243
244 destroy_cmd_buffer(cmd_buffer);
245 pthread_mutex_unlock(&device->mutex);
246 }
247
248 static void
reset_cmd_buffer(struct anv_cmd_buffer * cmd_buffer,UNUSED VkCommandBufferResetFlags flags)249 reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
250 UNUSED VkCommandBufferResetFlags flags)
251 {
252 vk_command_buffer_reset(&cmd_buffer->vk);
253
254 cmd_buffer->usage_flags = 0;
255 cmd_buffer->perf_query_pool = NULL;
256 cmd_buffer->is_companion_rcs_cmd_buffer = false;
257 anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
258 anv_cmd_state_reset(cmd_buffer);
259
260 memset(&cmd_buffer->generation.shader_state, 0,
261 sizeof(cmd_buffer->generation.shader_state));
262
263 cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
264 cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
265
266 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
267 anv_state_stream_init(&cmd_buffer->surface_state_stream,
268 &cmd_buffer->device->internal_surface_state_pool, 4096);
269
270 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
271 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
272 &cmd_buffer->device->dynamic_state_pool, 16384);
273
274 anv_state_stream_finish(&cmd_buffer->general_state_stream);
275 anv_state_stream_init(&cmd_buffer->general_state_stream,
276 &cmd_buffer->device->general_state_pool, 16384);
277
278 anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
279 anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
280 &cmd_buffer->device->indirect_push_descriptor_pool,
281 4096);
282
283 while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
284 struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
285 anv_device_release_bo(cmd_buffer->device, *bo);
286 }
287
288 anv_measure_reset(cmd_buffer);
289
290 u_trace_fini(&cmd_buffer->trace);
291 u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
292 }
293
294 void
anv_cmd_buffer_reset(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)295 anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
296 UNUSED VkCommandBufferResetFlags flags)
297 {
298 struct anv_cmd_buffer *cmd_buffer =
299 container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
300
301 if (cmd_buffer->companion_rcs_cmd_buffer) {
302 reset_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer, flags);
303 destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
304 cmd_buffer->companion_rcs_cmd_buffer = NULL;
305 }
306
307 ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
308
309 reset_cmd_buffer(cmd_buffer, flags);
310 }
311
312 const struct vk_command_buffer_ops anv_cmd_buffer_ops = {
313 .create = anv_create_cmd_buffer,
314 .reset = anv_cmd_buffer_reset,
315 .destroy = anv_cmd_buffer_destroy,
316 };
317
318 void
anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer * cmd_buffer)319 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
320 {
321 const struct intel_device_info *devinfo = cmd_buffer->device->info;
322 anv_genX(devinfo, cmd_buffer_emit_state_base_address)(cmd_buffer);
323 }
324
325 void
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count)326 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
327 const struct anv_image *image,
328 VkImageAspectFlagBits aspect,
329 enum isl_aux_usage aux_usage,
330 uint32_t level,
331 uint32_t base_layer,
332 uint32_t layer_count)
333 {
334 const struct intel_device_info *devinfo = cmd_buffer->device->info;
335 anv_genX(devinfo, cmd_buffer_mark_image_written)(cmd_buffer, image,
336 aspect, aux_usage,
337 level, base_layer,
338 layer_count);
339 }
340
341 void
anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,const enum isl_format format,union isl_color_value clear_color)342 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
343 const struct anv_image *image,
344 const enum isl_format format,
345 union isl_color_value clear_color)
346 {
347 const struct intel_device_info *devinfo = cmd_buffer->device->info;
348 anv_genX(devinfo, set_fast_clear_state)(cmd_buffer, image, format,
349 clear_color);
350 }
351
352 void
anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer * cmd_buffer,struct anv_state state,const struct anv_image * image)353 anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer,
354 struct anv_state state,
355 const struct anv_image *image)
356 {
357 const struct intel_device_info *devinfo = cmd_buffer->device->info;
358 anv_genX(devinfo, load_image_clear_color)(cmd_buffer, state, image);
359 }
360
361 void
anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer * cmd_buffer)362 anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
363 {
364 const struct intel_device_info *devinfo = cmd_buffer->device->info;
365 anv_genX(devinfo, cmd_emit_conditional_render_predicate)(cmd_buffer);
366 }
367
368 static void
clear_pending_query_bits(enum anv_query_bits * query_bits,enum anv_pipe_bits flushed_bits)369 clear_pending_query_bits(enum anv_query_bits *query_bits,
370 enum anv_pipe_bits flushed_bits)
371 {
372 if (flushed_bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
373 *query_bits &= ~ANV_QUERY_WRITES_RT_FLUSH;
374
375 if (flushed_bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
376 *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
377
378 if ((flushed_bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT) &&
379 (flushed_bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT) &&
380 (flushed_bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT))
381 *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
382
383 /* Once RT/TILE have been flushed, we can consider the CS_STALL flush */
384 if ((*query_bits & (ANV_QUERY_WRITES_TILE_FLUSH |
385 ANV_QUERY_WRITES_RT_FLUSH |
386 ANV_QUERY_WRITES_DATA_FLUSH)) == 0 &&
387 (flushed_bits & (ANV_PIPE_END_OF_PIPE_SYNC_BIT | ANV_PIPE_CS_STALL_BIT)))
388 *query_bits &= ~ANV_QUERY_WRITES_CS_STALL;
389 }
390
391 void
anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits flushed_bits)392 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
393 enum anv_pipe_bits flushed_bits)
394 {
395 clear_pending_query_bits(&cmd_buffer->state.queries.clear_bits, flushed_bits);
396 clear_pending_query_bits(&cmd_buffer->state.queries.buffer_write_bits, flushed_bits);
397 }
398
399 static bool
mem_update(void * dst,const void * src,size_t size)400 mem_update(void *dst, const void *src, size_t size)
401 {
402 if (memcmp(dst, src, size) == 0)
403 return false;
404
405 memcpy(dst, src, size);
406 return true;
407 }
408
409 static void
set_dirty_for_bind_map(struct anv_cmd_buffer * cmd_buffer,gl_shader_stage stage,const struct anv_pipeline_bind_map * map)410 set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
411 gl_shader_stage stage,
412 const struct anv_pipeline_bind_map *map)
413 {
414 assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s));
415 if (mem_update(cmd_buffer->state.surface_sha1s[stage],
416 map->surface_sha1, sizeof(map->surface_sha1)))
417 cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
418
419 assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s));
420 if (mem_update(cmd_buffer->state.sampler_sha1s[stage],
421 map->sampler_sha1, sizeof(map->sampler_sha1)))
422 cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
423
424 assert(stage < ARRAY_SIZE(cmd_buffer->state.push_sha1s));
425 if (mem_update(cmd_buffer->state.push_sha1s[stage],
426 map->push_sha1, sizeof(map->push_sha1)))
427 cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage);
428 }
429
430 static void
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipeline_state,struct anv_pipeline * pipeline,VkShaderStageFlags stages)431 anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
432 struct anv_cmd_pipeline_state *pipeline_state,
433 struct anv_pipeline *pipeline,
434 VkShaderStageFlags stages)
435 {
436 struct anv_device *device = cmd_buffer->device;
437
438 uint64_t ray_shadow_size =
439 align64(brw_rt_ray_queries_shadow_stacks_size(device->info,
440 pipeline->ray_queries),
441 4096);
442 if (ray_shadow_size > 0 &&
443 (!cmd_buffer->state.ray_query_shadow_bo ||
444 cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
445 unsigned shadow_size_log2 = MAX2(util_logbase2_ceil(ray_shadow_size), 16);
446 unsigned bucket = shadow_size_log2 - 16;
447 assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos));
448
449 struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[bucket]);
450 if (bo == NULL) {
451 struct anv_bo *new_bo;
452 VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
453 ray_shadow_size,
454 0, /* alloc_flags */
455 0, /* explicit_address */
456 &new_bo);
457 if (result != VK_SUCCESS) {
458 anv_batch_set_error(&cmd_buffer->batch, result);
459 return;
460 }
461
462 bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[bucket], NULL, new_bo);
463 if (bo != NULL) {
464 anv_device_release_bo(device, bo);
465 } else {
466 bo = new_bo;
467 }
468 }
469 cmd_buffer->state.ray_query_shadow_bo = bo;
470
471 /* Add the ray query buffers to the batch list. */
472 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
473 cmd_buffer->state.ray_query_shadow_bo);
474 }
475
476 /* Add the HW buffer to the list of BO used. */
477 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
478 device->ray_query_bo);
479
480 /* Fill the push constants & mark them dirty. */
481 struct anv_state ray_query_global_state =
482 anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
483
484 struct anv_address ray_query_globals_addr =
485 anv_state_pool_state_address(&device->dynamic_state_pool,
486 ray_query_global_state);
487 pipeline_state->push_constants.ray_query_globals =
488 anv_address_physical(ray_query_globals_addr);
489 cmd_buffer->state.push_constants_dirty |= stages;
490 }
491
492 /**
493 * This function compute changes between 2 pipelines and flags the dirty HW
494 * state appropriately.
495 */
496 static void
anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer * cmd_buffer,struct anv_graphics_pipeline * old_pipeline,struct anv_graphics_pipeline * new_pipeline)497 anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
498 struct anv_graphics_pipeline *old_pipeline,
499 struct anv_graphics_pipeline *new_pipeline)
500 {
501 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
502 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
503
504 #define diff_fix_state(bit, name) \
505 do { \
506 /* Fixed states should always have matching sizes */ \
507 assert(old_pipeline == NULL || \
508 old_pipeline->name.len == new_pipeline->name.len); \
509 /* Don't bother memcmp if the state is already dirty */ \
510 if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \
511 (old_pipeline == NULL || \
512 memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
513 &new_pipeline->batch_data[new_pipeline->name.offset], \
514 4 * new_pipeline->name.len) != 0)) \
515 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
516 } while (0)
517 #define diff_var_state(bit, name) \
518 do { \
519 /* Don't bother memcmp if the state is already dirty */ \
520 /* Also if the new state is empty, avoid marking dirty */ \
521 if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) && \
522 new_pipeline->name.len != 0 && \
523 (old_pipeline == NULL || \
524 old_pipeline->name.len != new_pipeline->name.len || \
525 memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
526 &new_pipeline->batch_data[new_pipeline->name.offset], \
527 4 * new_pipeline->name.len) != 0)) \
528 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
529 } while (0)
530 #define assert_identical(bit, name) \
531 do { \
532 /* Fixed states should always have matching sizes */ \
533 assert(old_pipeline == NULL || \
534 old_pipeline->name.len == new_pipeline->name.len); \
535 assert(old_pipeline == NULL || \
536 memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
537 &new_pipeline->batch_data[new_pipeline->name.offset], \
538 4 * new_pipeline->name.len) == 0); \
539 } while (0)
540 #define assert_empty(name) assert(new_pipeline->name.len == 0)
541
542 /* Compare all states, including partial packed ones, the dynamic part is
543 * left at 0 but the static part could still change.
544 */
545 diff_fix_state(URB, final.urb);
546 diff_fix_state(VF_SGVS, final.vf_sgvs);
547 if (cmd_buffer->device->info->ver >= 11)
548 diff_fix_state(VF_SGVS_2, final.vf_sgvs_2);
549 if (cmd_buffer->device->info->ver >= 12)
550 diff_fix_state(PRIMITIVE_REPLICATION, final.primitive_replication);
551 diff_fix_state(SBE, final.sbe);
552 diff_fix_state(SBE_SWIZ, final.sbe_swiz);
553 diff_fix_state(MULTISAMPLE, final.ms);
554 diff_fix_state(VS, final.vs);
555 diff_fix_state(HS, final.hs);
556 diff_fix_state(DS, final.ds);
557 diff_fix_state(PS, final.ps);
558
559 diff_fix_state(CLIP, partial.clip);
560 diff_fix_state(SF, partial.sf);
561 diff_fix_state(RASTER, partial.raster);
562 diff_fix_state(WM, partial.wm);
563 diff_fix_state(STREAMOUT, partial.so);
564 diff_fix_state(GS, partial.gs);
565 diff_fix_state(TE, partial.te);
566 diff_fix_state(VFG, partial.vfg);
567 diff_fix_state(PS_EXTRA, partial.ps_extra);
568
569 if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
570 diff_fix_state(TASK_CONTROL, final.task_control);
571 diff_fix_state(TASK_SHADER, final.task_shader);
572 diff_fix_state(TASK_REDISTRIB, final.task_redistrib);
573 diff_fix_state(MESH_CONTROL, final.mesh_control);
574 diff_fix_state(MESH_SHADER, final.mesh_shader);
575 diff_fix_state(MESH_DISTRIB, final.mesh_distrib);
576 diff_fix_state(CLIP_MESH, final.clip_mesh);
577 diff_fix_state(SBE_MESH, final.sbe_mesh);
578 } else {
579 assert_empty(final.task_control);
580 assert_empty(final.task_shader);
581 assert_empty(final.task_redistrib);
582 assert_empty(final.mesh_control);
583 assert_empty(final.mesh_shader);
584 assert_empty(final.mesh_distrib);
585 assert_empty(final.clip_mesh);
586 assert_empty(final.sbe_mesh);
587 }
588
589 /* States that should never vary between pipelines, but can be affected by
590 * blorp etc...
591 */
592 assert_identical(VF_STATISTICS, final.vf_statistics);
593
594 /* States that can vary in length */
595 diff_var_state(VF_SGVS_INSTANCING, final.vf_sgvs_instancing);
596 diff_var_state(SO_DECL_LIST, final.so_decl_list);
597
598 #undef diff_fix_state
599 #undef diff_var_state
600 #undef assert_identical
601 #undef assert_empty
602
603 /* We're not diffing the following :
604 * - anv_graphics_pipeline::vertex_input_data
605 * - anv_graphics_pipeline::final::vf_instancing
606 *
607 * since they are tracked by the runtime.
608 */
609 }
610
anv_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)611 void anv_CmdBindPipeline(
612 VkCommandBuffer commandBuffer,
613 VkPipelineBindPoint pipelineBindPoint,
614 VkPipeline _pipeline)
615 {
616 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
617 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
618 struct anv_cmd_pipeline_state *state;
619 VkShaderStageFlags stages = 0;
620
621 switch (pipelineBindPoint) {
622 case VK_PIPELINE_BIND_POINT_COMPUTE: {
623 if (cmd_buffer->state.compute.base.pipeline == pipeline)
624 return;
625
626 cmd_buffer->state.compute.base.pipeline = pipeline;
627 cmd_buffer->state.compute.pipeline_dirty = true;
628
629 struct anv_compute_pipeline *compute_pipeline =
630 anv_pipeline_to_compute(pipeline);
631 set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
632 &compute_pipeline->cs->bind_map);
633
634 state = &cmd_buffer->state.compute.base;
635 stages = VK_SHADER_STAGE_COMPUTE_BIT;
636 break;
637 }
638
639 case VK_PIPELINE_BIND_POINT_GRAPHICS: {
640 struct anv_graphics_pipeline *new_pipeline =
641 anv_pipeline_to_graphics(pipeline);
642
643 /* Apply the non dynamic state from the pipeline */
644 vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
645 &new_pipeline->dynamic_state);
646
647 if (cmd_buffer->state.gfx.base.pipeline == pipeline)
648 return;
649
650 struct anv_graphics_pipeline *old_pipeline =
651 cmd_buffer->state.gfx.base.pipeline == NULL ? NULL :
652 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
653
654 cmd_buffer->state.gfx.base.pipeline = pipeline;
655 cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
656
657 anv_foreach_stage(stage, new_pipeline->base.base.active_stages) {
658 set_dirty_for_bind_map(cmd_buffer, stage,
659 &new_pipeline->base.shaders[stage]->bind_map);
660 }
661
662 state = &cmd_buffer->state.gfx.base;
663 stages = new_pipeline->base.base.active_stages;
664
665
666 /* When the pipeline is using independent states and dynamic buffers,
667 * this will trigger an update of anv_push_constants::dynamic_base_index
668 * & anv_push_constants::dynamic_offsets.
669 */
670 struct anv_push_constants *push =
671 &cmd_buffer->state.gfx.base.push_constants;
672 struct anv_pipeline_sets_layout *layout = &new_pipeline->base.base.layout;
673 if (layout->independent_sets && layout->num_dynamic_buffers > 0) {
674 bool modified = false;
675 for (uint32_t s = 0; s < layout->num_sets; s++) {
676 if (layout->set[s].layout == NULL)
677 continue;
678
679 assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
680 if (layout->set[s].layout->dynamic_offset_count > 0 &&
681 (push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
682 layout->set[s].dynamic_offset_start) {
683 push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
684 push->desc_surface_offsets[s] |= (layout->set[s].dynamic_offset_start &
685 ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
686 modified = true;
687 }
688 }
689 if (modified)
690 cmd_buffer->state.push_constants_dirty |= stages;
691 }
692
693 if ((new_pipeline->fs_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC) &&
694 push->gfx.fs_msaa_flags != new_pipeline->fs_msaa_flags) {
695 push->gfx.fs_msaa_flags = new_pipeline->fs_msaa_flags;
696 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
697 }
698 if (new_pipeline->dynamic_patch_control_points) {
699 cmd_buffer->state.push_constants_dirty |=
700 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
701 }
702
703 anv_cmd_buffer_flush_pipeline_state(cmd_buffer, old_pipeline, new_pipeline);
704 break;
705 }
706
707 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
708 if (cmd_buffer->state.rt.base.pipeline == pipeline)
709 return;
710
711 cmd_buffer->state.rt.base.pipeline = pipeline;
712 cmd_buffer->state.rt.pipeline_dirty = true;
713
714 struct anv_ray_tracing_pipeline *rt_pipeline =
715 anv_pipeline_to_ray_tracing(pipeline);
716 if (rt_pipeline->stack_size > 0) {
717 anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
718 rt_pipeline->stack_size);
719 }
720
721 state = &cmd_buffer->state.rt.base;
722 break;
723 }
724
725 default:
726 unreachable("invalid bind point");
727 break;
728 }
729
730 if (pipeline->ray_queries > 0)
731 anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
732 }
733
734 static void
anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,struct anv_pipeline_sets_layout * layout,uint32_t set_index,struct anv_descriptor_set * set,uint32_t * dynamic_offset_count,const uint32_t ** dynamic_offsets)735 anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
736 VkPipelineBindPoint bind_point,
737 struct anv_pipeline_sets_layout *layout,
738 uint32_t set_index,
739 struct anv_descriptor_set *set,
740 uint32_t *dynamic_offset_count,
741 const uint32_t **dynamic_offsets)
742 {
743 /* Either we have no pool because it's a push descriptor or the pool is not
744 * host only :
745 *
746 * VUID-vkCmdBindDescriptorSets-pDescriptorSets-04616:
747 *
748 * "Each element of pDescriptorSets must not have been allocated from a
749 * VkDescriptorPool with the
750 * VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT flag set"
751 */
752 assert(!set->pool || !set->pool->host_only);
753
754 struct anv_descriptor_set_layout *set_layout = set->layout;
755 VkShaderStageFlags stages = set_layout->shader_stages;
756 struct anv_cmd_pipeline_state *pipe_state;
757
758 switch (bind_point) {
759 case VK_PIPELINE_BIND_POINT_GRAPHICS:
760 stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
761 (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ?
762 (VK_SHADER_STAGE_TASK_BIT_EXT |
763 VK_SHADER_STAGE_MESH_BIT_EXT) : 0);
764 pipe_state = &cmd_buffer->state.gfx.base;
765 break;
766
767 case VK_PIPELINE_BIND_POINT_COMPUTE:
768 stages &= VK_SHADER_STAGE_COMPUTE_BIT;
769 pipe_state = &cmd_buffer->state.compute.base;
770 break;
771
772 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
773 stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
774 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
775 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
776 VK_SHADER_STAGE_MISS_BIT_KHR |
777 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
778 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
779 pipe_state = &cmd_buffer->state.rt.base;
780 break;
781
782 default:
783 unreachable("invalid bind point");
784 }
785
786 VkShaderStageFlags dirty_stages = 0;
787 /* If it's a push descriptor set, we have to flag things as dirty
788 * regardless of whether or not the CPU-side data structure changed as we
789 * may have edited in-place.
790 */
791 if (pipe_state->descriptors[set_index] != set ||
792 anv_descriptor_set_is_push(set)) {
793 pipe_state->descriptors[set_index] = set;
794
795 /* When using indirect descriptors, stages that have access to the HW
796 * binding tables, never need to access the
797 * anv_push_constants::desc_surface_offsets fields, because any data
798 * they need from the descriptor buffer is accessible through a binding
799 * table entry. For stages that are "bindless" (Mesh/Task/RT), we need
800 * to provide anv_push_constants::desc_surface_offsets matching the bound
801 * descriptor so that shaders can access the descriptor buffer through
802 * A64 messages.
803 *
804 * With direct descriptors, the shaders can use the
805 * anv_push_constants::desc_surface_offsets to build bindless offsets.
806 * So it's we always need to update the push constant data.
807 */
808 bool update_desc_sets =
809 !cmd_buffer->device->physical->indirect_descriptors ||
810 (stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
811 VK_SHADER_STAGE_MESH_BIT_EXT |
812 VK_SHADER_STAGE_RAYGEN_BIT_KHR |
813 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
814 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
815 VK_SHADER_STAGE_MISS_BIT_KHR |
816 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
817 VK_SHADER_STAGE_CALLABLE_BIT_KHR));
818
819 if (update_desc_sets) {
820 struct anv_push_constants *push = &pipe_state->push_constants;
821
822 uint64_t offset =
823 anv_address_physical(set->desc_surface_addr) -
824 cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
825 assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
826 push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
827 push->desc_surface_offsets[set_index] |= offset;
828 push->desc_sampler_offsets[set_index] |=
829 anv_address_physical(set->desc_sampler_addr) -
830 cmd_buffer->device->physical->va.dynamic_state_pool.addr;
831
832 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
833 set->desc_surface_addr.bo);
834 anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
835 set->desc_sampler_addr.bo);
836 }
837
838 dirty_stages |= stages;
839 }
840
841 if (dynamic_offsets) {
842 if (set_layout->dynamic_offset_count > 0) {
843 struct anv_push_constants *push = &pipe_state->push_constants;
844 uint32_t dynamic_offset_start =
845 layout->set[set_index].dynamic_offset_start;
846 uint32_t *push_offsets =
847 &push->dynamic_offsets[dynamic_offset_start];
848
849 memcpy(pipe_state->dynamic_offsets[set_index].offsets,
850 *dynamic_offsets,
851 sizeof(uint32_t) * MIN2(*dynamic_offset_count,
852 set_layout->dynamic_offset_count));
853
854 /* Assert that everything is in range */
855 assert(set_layout->dynamic_offset_count <= *dynamic_offset_count);
856 assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
857 ARRAY_SIZE(push->dynamic_offsets));
858
859 for (uint32_t i = 0; i < set_layout->dynamic_offset_count; i++) {
860 if (push_offsets[i] != (*dynamic_offsets)[i]) {
861 pipe_state->dynamic_offsets[set_index].offsets[i] =
862 push_offsets[i] = (*dynamic_offsets)[i];
863 /* dynamic_offset_stages[] elements could contain blanket
864 * values like VK_SHADER_STAGE_ALL, so limit this to the
865 * binding point's bits.
866 */
867 dirty_stages |= set_layout->dynamic_offset_stages[i] & stages;
868 }
869 }
870
871 *dynamic_offsets += set_layout->dynamic_offset_count;
872 *dynamic_offset_count -= set_layout->dynamic_offset_count;
873 }
874 }
875
876 if (set->is_push)
877 cmd_buffer->state.push_descriptors_dirty |= dirty_stages;
878 else
879 cmd_buffer->state.descriptors_dirty |= dirty_stages;
880 cmd_buffer->state.push_constants_dirty |= dirty_stages;
881 }
882
883 #define ANV_GRAPHICS_STAGE_BITS \
884 (VK_SHADER_STAGE_ALL_GRAPHICS | \
885 VK_SHADER_STAGE_MESH_BIT_EXT | \
886 VK_SHADER_STAGE_TASK_BIT_EXT)
887
888 #define ANV_RT_STAGE_BITS \
889 (VK_SHADER_STAGE_RAYGEN_BIT_KHR | \
890 VK_SHADER_STAGE_ANY_HIT_BIT_KHR | \
891 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | \
892 VK_SHADER_STAGE_MISS_BIT_KHR | \
893 VK_SHADER_STAGE_INTERSECTION_BIT_KHR | \
894 VK_SHADER_STAGE_CALLABLE_BIT_KHR)
895
anv_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pInfo)896 void anv_CmdBindDescriptorSets2KHR(
897 VkCommandBuffer commandBuffer,
898 const VkBindDescriptorSetsInfoKHR* pInfo)
899 {
900 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
901 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
902 struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
903
904 assert(pInfo->firstSet + pInfo->descriptorSetCount <= MAX_SETS);
905
906 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
907 uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
908 const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
909
910 for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
911 ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
912 if (set == NULL)
913 continue;
914 anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
915 VK_PIPELINE_BIND_POINT_COMPUTE,
916 layout, pInfo->firstSet + i, set,
917 &dynamicOffsetCount,
918 &pDynamicOffsets);
919 }
920 }
921 if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
922 uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
923 const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
924
925 for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
926 ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
927 if (set == NULL)
928 continue;
929 anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
930 VK_PIPELINE_BIND_POINT_GRAPHICS,
931 layout, pInfo->firstSet + i, set,
932 &dynamicOffsetCount,
933 &pDynamicOffsets);
934 }
935 }
936 if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
937 uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
938 const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
939
940 for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
941 ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
942 if (set == NULL)
943 continue;
944 anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
945 VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
946 layout, pInfo->firstSet + i, set,
947 &dynamicOffsetCount,
948 &pDynamicOffsets);
949 }
950 }
951 }
952
anv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)953 void anv_CmdBindVertexBuffers2(
954 VkCommandBuffer commandBuffer,
955 uint32_t firstBinding,
956 uint32_t bindingCount,
957 const VkBuffer* pBuffers,
958 const VkDeviceSize* pOffsets,
959 const VkDeviceSize* pSizes,
960 const VkDeviceSize* pStrides)
961 {
962 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
963 struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
964
965 /* We have to defer setting up vertex buffer since we need the buffer
966 * stride from the pipeline. */
967
968 assert(firstBinding + bindingCount <= MAX_VBS);
969 for (uint32_t i = 0; i < bindingCount; i++) {
970 ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
971
972 if (buffer == NULL) {
973 vb[firstBinding + i] = (struct anv_vertex_binding) {
974 .buffer = NULL,
975 };
976 } else {
977 vb[firstBinding + i] = (struct anv_vertex_binding) {
978 .buffer = buffer,
979 .offset = pOffsets[i],
980 .size = vk_buffer_range(&buffer->vk, pOffsets[i],
981 pSizes ? pSizes[i] : VK_WHOLE_SIZE),
982 };
983 }
984 cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
985 }
986
987 if (pStrides != NULL) {
988 vk_cmd_set_vertex_binding_strides(&cmd_buffer->vk, firstBinding,
989 bindingCount, pStrides);
990 }
991 }
992
anv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes)993 void anv_CmdBindTransformFeedbackBuffersEXT(
994 VkCommandBuffer commandBuffer,
995 uint32_t firstBinding,
996 uint32_t bindingCount,
997 const VkBuffer* pBuffers,
998 const VkDeviceSize* pOffsets,
999 const VkDeviceSize* pSizes)
1000 {
1001 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1002 struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
1003
1004 /* We have to defer setting up vertex buffer since we need the buffer
1005 * stride from the pipeline. */
1006
1007 assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
1008 for (uint32_t i = 0; i < bindingCount; i++) {
1009 if (pBuffers[i] == VK_NULL_HANDLE) {
1010 xfb[firstBinding + i].buffer = NULL;
1011 } else {
1012 ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
1013 xfb[firstBinding + i].buffer = buffer;
1014 xfb[firstBinding + i].offset = pOffsets[i];
1015 xfb[firstBinding + i].size =
1016 vk_buffer_range(&buffer->vk, pOffsets[i],
1017 pSizes ? pSizes[i] : VK_WHOLE_SIZE);
1018 }
1019 }
1020 }
1021
1022 enum isl_format
anv_isl_format_for_descriptor_type(const struct anv_device * device,VkDescriptorType type)1023 anv_isl_format_for_descriptor_type(const struct anv_device *device,
1024 VkDescriptorType type)
1025 {
1026 switch (type) {
1027 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1028 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1029 return device->physical->compiler->indirect_ubos_use_sampler ?
1030 ISL_FORMAT_R32G32B32A32_FLOAT : ISL_FORMAT_RAW;
1031
1032 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1033 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1034 return ISL_FORMAT_RAW;
1035
1036 default:
1037 unreachable("Invalid descriptor type");
1038 }
1039 }
1040
1041 struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer * cmd_buffer,const void * data,uint32_t size,uint32_t alignment)1042 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
1043 const void *data, uint32_t size, uint32_t alignment)
1044 {
1045 struct anv_state state;
1046
1047 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
1048 memcpy(state.map, data, size);
1049
1050 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
1051
1052 return state;
1053 }
1054
1055 struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer * cmd_buffer,uint32_t * a,uint32_t * b,uint32_t dwords,uint32_t alignment)1056 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
1057 uint32_t *a, uint32_t *b,
1058 uint32_t dwords, uint32_t alignment)
1059 {
1060 struct anv_state state;
1061 uint32_t *p;
1062
1063 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1064 dwords * 4, alignment);
1065 p = state.map;
1066 for (uint32_t i = 0; i < dwords; i++)
1067 p[i] = a[i] | b[i];
1068
1069 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
1070
1071 return state;
1072 }
1073
1074 struct anv_state
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer * cmd_buffer)1075 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
1076 {
1077 struct anv_push_constants *data =
1078 &cmd_buffer->state.gfx.base.push_constants;
1079
1080 struct anv_state state =
1081 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1082 sizeof(struct anv_push_constants),
1083 32 /* bottom 5 bits MBZ */);
1084 memcpy(state.map, data, sizeof(struct anv_push_constants));
1085
1086 return state;
1087 }
1088
1089 struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer * cmd_buffer)1090 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
1091 {
1092 const struct intel_device_info *devinfo = cmd_buffer->device->info;
1093 struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1094 struct anv_push_constants *data = &pipe_state->push_constants;
1095 struct anv_compute_pipeline *pipeline =
1096 anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline);
1097 const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1098 const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
1099
1100 const struct intel_cs_dispatch_info dispatch =
1101 brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1102 const unsigned total_push_constants_size =
1103 brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
1104 if (total_push_constants_size == 0)
1105 return (struct anv_state) { .offset = 0 };
1106
1107 const unsigned push_constant_alignment = 64;
1108 const unsigned aligned_total_push_constants_size =
1109 ALIGN(total_push_constants_size, push_constant_alignment);
1110 struct anv_state state;
1111 if (devinfo->verx10 >= 125) {
1112 state = anv_state_stream_alloc(&cmd_buffer->general_state_stream,
1113 aligned_total_push_constants_size,
1114 push_constant_alignment);
1115 } else {
1116 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1117 aligned_total_push_constants_size,
1118 push_constant_alignment);
1119 }
1120 if (state.map == NULL)
1121 return state;
1122
1123 void *dst = state.map;
1124 const void *src = (char *)data + (range->start * 32);
1125
1126 if (cs_prog_data->push.cross_thread.size > 0) {
1127 memcpy(dst, src, cs_prog_data->push.cross_thread.size);
1128 dst += cs_prog_data->push.cross_thread.size;
1129 src += cs_prog_data->push.cross_thread.size;
1130 }
1131
1132 if (cs_prog_data->push.per_thread.size > 0) {
1133 for (unsigned t = 0; t < dispatch.threads; t++) {
1134 memcpy(dst, src, cs_prog_data->push.per_thread.size);
1135
1136 uint32_t *subgroup_id = dst +
1137 offsetof(struct anv_push_constants, cs.subgroup_id) -
1138 (range->start * 32 + cs_prog_data->push.cross_thread.size);
1139 *subgroup_id = t;
1140
1141 dst += cs_prog_data->push.per_thread.size;
1142 }
1143 }
1144
1145 return state;
1146 }
1147
anv_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pInfo)1148 void anv_CmdPushConstants2KHR(
1149 VkCommandBuffer commandBuffer,
1150 const VkPushConstantsInfoKHR* pInfo)
1151 {
1152 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1153
1154 if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
1155 struct anv_cmd_pipeline_state *pipe_state =
1156 &cmd_buffer->state.gfx.base;
1157
1158 memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1159 pInfo->pValues, pInfo->size);
1160 }
1161 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
1162 struct anv_cmd_pipeline_state *pipe_state =
1163 &cmd_buffer->state.compute.base;
1164
1165 memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1166 pInfo->pValues, pInfo->size);
1167 }
1168 if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
1169 struct anv_cmd_pipeline_state *pipe_state =
1170 &cmd_buffer->state.rt.base;
1171
1172 memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1173 pInfo->pValues, pInfo->size);
1174 }
1175
1176 cmd_buffer->state.push_constants_dirty |= pInfo->stageFlags;
1177 }
1178
1179 static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1180 anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer *cmd_buffer,
1181 VkPipelineBindPoint bind_point)
1182 {
1183 switch (bind_point) {
1184 case VK_PIPELINE_BIND_POINT_GRAPHICS:
1185 return &cmd_buffer->state.gfx.base;
1186 case VK_PIPELINE_BIND_POINT_COMPUTE:
1187 return &cmd_buffer->state.compute.base;
1188 case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1189 return &cmd_buffer->state.rt.base;
1190 break;
1191 default:
1192 unreachable("invalid bind point");
1193 }
1194 }
1195
1196 static void
anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,const VkPushDescriptorSetInfoKHR * pInfo)1197 anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
1198 VkPipelineBindPoint bind_point,
1199 const VkPushDescriptorSetInfoKHR *pInfo)
1200 {
1201 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1202 struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1203
1204 assert(pInfo->set < MAX_SETS);
1205
1206 struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1207
1208 struct anv_push_descriptor_set *push_set =
1209 &anv_cmd_buffer_get_pipe_state(cmd_buffer,
1210 bind_point)->push_descriptor;
1211 if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1212 return;
1213
1214 anv_descriptor_set_write(cmd_buffer->device, &push_set->set,
1215 pInfo->descriptorWriteCount,
1216 pInfo->pDescriptorWrites);
1217
1218 anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point,
1219 layout, pInfo->set, &push_set->set,
1220 NULL, NULL);
1221 }
1222
anv_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pInfo)1223 void anv_CmdPushDescriptorSet2KHR(
1224 VkCommandBuffer commandBuffer,
1225 const VkPushDescriptorSetInfoKHR* pInfo)
1226 {
1227 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1228
1229 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
1230 anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1231 VK_PIPELINE_BIND_POINT_COMPUTE,
1232 pInfo);
1233 if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS)
1234 anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1235 VK_PIPELINE_BIND_POINT_GRAPHICS,
1236 pInfo);
1237 if (pInfo->stageFlags & ANV_RT_STAGE_BITS)
1238 anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1239 VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
1240 pInfo);
1241 }
1242
anv_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pInfo)1243 void anv_CmdPushDescriptorSetWithTemplate2KHR(
1244 VkCommandBuffer commandBuffer,
1245 const VkPushDescriptorSetWithTemplateInfoKHR* pInfo)
1246 {
1247 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1248 VK_FROM_HANDLE(vk_descriptor_update_template, template,
1249 pInfo->descriptorUpdateTemplate);
1250 ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1251 struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1252
1253 assert(pInfo->set < MAX_PUSH_DESCRIPTORS);
1254
1255 struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1256
1257 struct anv_push_descriptor_set *push_set =
1258 &anv_cmd_buffer_get_pipe_state(cmd_buffer,
1259 template->bind_point)->push_descriptor;
1260 if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1261 return;
1262
1263 anv_descriptor_set_write_template(cmd_buffer->device, &push_set->set,
1264 template,
1265 pInfo->pData);
1266
1267 anv_cmd_buffer_bind_descriptor_set(cmd_buffer, template->bind_point,
1268 layout, pInfo->set, &push_set->set,
1269 NULL, NULL);
1270 }
1271
anv_CmdSetRayTracingPipelineStackSizeKHR(VkCommandBuffer commandBuffer,uint32_t pipelineStackSize)1272 void anv_CmdSetRayTracingPipelineStackSizeKHR(
1273 VkCommandBuffer commandBuffer,
1274 uint32_t pipelineStackSize)
1275 {
1276 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1277 struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
1278 struct anv_device *device = cmd_buffer->device;
1279
1280 if (anv_batch_has_error(&cmd_buffer->batch))
1281 return;
1282
1283 uint32_t stack_ids_per_dss = 2048; /* TODO */
1284
1285 unsigned stack_size_log2 = util_logbase2_ceil(pipelineStackSize);
1286 if (stack_size_log2 < 10)
1287 stack_size_log2 = 10;
1288
1289 if (rt->scratch.layout.total_size == 1 << stack_size_log2)
1290 return;
1291
1292 brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info,
1293 stack_ids_per_dss, 1 << stack_size_log2);
1294
1295 unsigned bucket = stack_size_log2 - 10;
1296 assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
1297
1298 struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
1299 if (bo == NULL) {
1300 struct anv_bo *new_bo;
1301 VkResult result = anv_device_alloc_bo(device, "RT scratch",
1302 rt->scratch.layout.total_size,
1303 0, /* alloc_flags */
1304 0, /* explicit_address */
1305 &new_bo);
1306 if (result != VK_SUCCESS) {
1307 rt->scratch.layout.total_size = 0;
1308 anv_batch_set_error(&cmd_buffer->batch, result);
1309 return;
1310 }
1311
1312 bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
1313 if (bo != NULL) {
1314 anv_device_release_bo(device, bo);
1315 } else {
1316 bo = new_bo;
1317 }
1318 }
1319
1320 rt->scratch.bo = bo;
1321 }
1322
1323 void
anv_cmd_buffer_save_state(struct anv_cmd_buffer * cmd_buffer,uint32_t flags,struct anv_cmd_saved_state * state)1324 anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
1325 uint32_t flags,
1326 struct anv_cmd_saved_state *state)
1327 {
1328 state->flags = flags;
1329
1330 /* we only support the compute pipeline at the moment */
1331 assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1332 const struct anv_cmd_pipeline_state *pipe_state =
1333 &cmd_buffer->state.compute.base;
1334
1335 if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE)
1336 state->pipeline = pipe_state->pipeline;
1337
1338 if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0)
1339 state->descriptor_set = pipe_state->descriptors[0];
1340
1341 if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1342 memcpy(state->push_constants, pipe_state->push_constants.client_data,
1343 sizeof(state->push_constants));
1344 }
1345 }
1346
1347 void
anv_cmd_buffer_restore_state(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_saved_state * state)1348 anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
1349 struct anv_cmd_saved_state *state)
1350 {
1351 VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
1352
1353 assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1354 const VkPipelineBindPoint bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1355 const VkShaderStageFlags stage_flags = VK_SHADER_STAGE_COMPUTE_BIT;
1356 struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1357
1358 if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE) {
1359 if (state->pipeline) {
1360 anv_CmdBindPipeline(cmd_buffer_, bind_point,
1361 anv_pipeline_to_handle(state->pipeline));
1362 } else {
1363 pipe_state->pipeline = NULL;
1364 }
1365 }
1366
1367 if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0) {
1368 if (state->descriptor_set) {
1369 anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, 0,
1370 state->descriptor_set, NULL, NULL);
1371 } else {
1372 pipe_state->descriptors[0] = NULL;
1373 }
1374 }
1375
1376 if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1377 VkPushConstantsInfoKHR push_info = {
1378 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
1379 .layout = VK_NULL_HANDLE,
1380 .stageFlags = stage_flags,
1381 .offset = 0,
1382 .size = sizeof(state->push_constants),
1383 .pValues = state->push_constants,
1384 };
1385 anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
1386 }
1387 }
1388