• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "anv_private.h"
31 #include "anv_measure.h"
32 
33 #include "vk_util.h"
34 
35 /** \file anv_cmd_buffer.c
36  *
37  * This file contains all of the stuff for emitting commands into a command
38  * buffer.  This includes implementations of most of the vkCmd*
39  * entrypoints.  This file is concerned entirely with state emission and
40  * not with the command buffer data structure itself.  As far as this file
41  * is concerned, most of anv_cmd_buffer is magic.
42  */
43 
44 static void
anv_cmd_state_init(struct anv_cmd_buffer * cmd_buffer)45 anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
46 {
47    struct anv_cmd_state *state = &cmd_buffer->state;
48 
49    memset(state, 0, sizeof(*state));
50 
51    state->current_pipeline = UINT32_MAX;
52    state->gfx.restart_index = UINT32_MAX;
53    state->gfx.object_preemption = true;
54    state->gfx.dirty = 0;
55 
56    memcpy(state->gfx.dyn_state.dirty,
57           cmd_buffer->device->gfx_dirty_state,
58           sizeof(state->gfx.dyn_state.dirty));
59 }
60 
61 static void
anv_cmd_pipeline_state_finish(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipe_state)62 anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer,
63                               struct anv_cmd_pipeline_state *pipe_state)
64 {
65    anv_push_descriptor_set_finish(&pipe_state->push_descriptor);
66 }
67 
68 static void
anv_cmd_state_finish(struct anv_cmd_buffer * cmd_buffer)69 anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer)
70 {
71    struct anv_cmd_state *state = &cmd_buffer->state;
72 
73    anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base);
74    anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base);
75 }
76 
77 static void
anv_cmd_state_reset(struct anv_cmd_buffer * cmd_buffer)78 anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
79 {
80    anv_cmd_state_finish(cmd_buffer);
81    anv_cmd_state_init(cmd_buffer);
82 }
83 
84 VkResult
anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer * cmd_buffer)85 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer)
86 {
87    if (cmd_buffer->companion_rcs_cmd_buffer)
88       return VK_SUCCESS;
89 
90    VkResult result = VK_SUCCESS;
91    pthread_mutex_lock(&cmd_buffer->device->mutex);
92    VK_FROM_HANDLE(vk_command_pool, pool,
93                   cmd_buffer->device->companion_rcs_cmd_pool);
94    assert(pool != NULL);
95 
96    struct vk_command_buffer *tmp_cmd_buffer = NULL;
97    result = pool->command_buffer_ops->create(pool, cmd_buffer->vk.level, &tmp_cmd_buffer);
98 
99    if (result != VK_SUCCESS)
100       goto unlock_and_return;
101 
102    cmd_buffer->companion_rcs_cmd_buffer =
103       container_of(tmp_cmd_buffer, struct anv_cmd_buffer, vk);
104    anv_genX(cmd_buffer->device->info, cmd_buffer_begin_companion)(
105       cmd_buffer->companion_rcs_cmd_buffer, cmd_buffer->vk.level);
106 
107 unlock_and_return:
108    pthread_mutex_unlock(&cmd_buffer->device->mutex);
109    return result;
110 }
111 
112 static VkResult
anv_create_cmd_buffer(struct vk_command_pool * pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmd_buffer_out)113 anv_create_cmd_buffer(struct vk_command_pool *pool,
114                       VkCommandBufferLevel level,
115                       struct vk_command_buffer **cmd_buffer_out)
116 {
117    struct anv_device *device =
118       container_of(pool->base.device, struct anv_device, vk);
119    struct anv_cmd_buffer *cmd_buffer;
120    VkResult result;
121 
122    cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
123                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
124    if (cmd_buffer == NULL)
125       return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
126 
127    result = vk_command_buffer_init(pool, &cmd_buffer->vk,
128                                    &anv_cmd_buffer_ops, level);
129    if (result != VK_SUCCESS)
130       goto fail_alloc;
131 
132    cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations =
133       &cmd_buffer->state.gfx.sample_locations;
134    cmd_buffer->vk.dynamic_graphics_state.vi =
135       &cmd_buffer->state.gfx.vertex_input;
136 
137    cmd_buffer->batch.status = VK_SUCCESS;
138    cmd_buffer->generation.batch.status = VK_SUCCESS;
139 
140    cmd_buffer->device = device;
141 
142    assert(pool->queue_family_index < device->physical->queue.family_count);
143    cmd_buffer->queue_family =
144       &device->physical->queue.families[pool->queue_family_index];
145 
146    result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
147    if (result != VK_SUCCESS)
148       goto fail_vk;
149 
150    anv_state_stream_init(&cmd_buffer->surface_state_stream,
151                          &device->internal_surface_state_pool, 4096);
152    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
153                          &device->dynamic_state_pool, 16384);
154    anv_state_stream_init(&cmd_buffer->general_state_stream,
155                          &device->general_state_pool, 16384);
156    anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
157                          &device->indirect_push_descriptor_pool, 4096);
158    anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
159                          &device->push_descriptor_buffer_pool, 4096);
160 
161    int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8,
162                                     sizeof(struct anv_bo *));
163    if (!success)
164       goto fail_batch_bo;
165 
166    cmd_buffer->self_mod_locations = NULL;
167    cmd_buffer->companion_rcs_cmd_buffer = NULL;
168    cmd_buffer->is_companion_rcs_cmd_buffer = false;
169 
170    cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
171    cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
172 
173    memset(&cmd_buffer->generation.shader_state, 0,
174           sizeof(cmd_buffer->generation.shader_state));
175 
176    anv_cmd_state_init(cmd_buffer);
177 
178    anv_measure_init(cmd_buffer);
179 
180    u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
181 
182    *cmd_buffer_out = &cmd_buffer->vk;
183 
184    return VK_SUCCESS;
185 
186  fail_batch_bo:
187    anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
188  fail_vk:
189    vk_command_buffer_finish(&cmd_buffer->vk);
190  fail_alloc:
191    vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
192 
193    return result;
194 }
195 
196 static void
destroy_cmd_buffer(struct anv_cmd_buffer * cmd_buffer)197 destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
198 {
199    u_trace_fini(&cmd_buffer->trace);
200 
201    anv_measure_destroy(cmd_buffer);
202 
203    anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
204 
205    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
206    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
207    anv_state_stream_finish(&cmd_buffer->general_state_stream);
208    anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
209    anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
210 
211    while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
212       struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
213       anv_bo_pool_free((*bo)->map != NULL ?
214                        &cmd_buffer->device->batch_bo_pool :
215                        &cmd_buffer->device->bvh_bo_pool, *bo);
216    }
217    u_vector_finish(&cmd_buffer->dynamic_bos);
218 
219    anv_cmd_state_finish(cmd_buffer);
220 
221    vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer->self_mod_locations);
222 
223    vk_command_buffer_finish(&cmd_buffer->vk);
224    vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
225 }
226 
227 static void
anv_cmd_buffer_destroy(struct vk_command_buffer * vk_cmd_buffer)228 anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
229 {
230    struct anv_cmd_buffer *cmd_buffer =
231       container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
232    struct anv_device *device = cmd_buffer->device;
233 
234    pthread_mutex_lock(&device->mutex);
235    if (cmd_buffer->companion_rcs_cmd_buffer) {
236       destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
237       cmd_buffer->companion_rcs_cmd_buffer = NULL;
238    }
239 
240    ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
241 
242    destroy_cmd_buffer(cmd_buffer);
243    pthread_mutex_unlock(&device->mutex);
244 }
245 
246 static void
reset_cmd_buffer(struct anv_cmd_buffer * cmd_buffer,UNUSED VkCommandBufferResetFlags flags)247 reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
248                  UNUSED VkCommandBufferResetFlags flags)
249 {
250    vk_command_buffer_reset(&cmd_buffer->vk);
251 
252    cmd_buffer->usage_flags = 0;
253    cmd_buffer->perf_query_pool = NULL;
254    cmd_buffer->is_companion_rcs_cmd_buffer = false;
255    anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
256    anv_cmd_state_reset(cmd_buffer);
257 
258    memset(&cmd_buffer->generation.shader_state, 0,
259           sizeof(cmd_buffer->generation.shader_state));
260 
261    cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
262    cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
263 
264    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
265    anv_state_stream_init(&cmd_buffer->surface_state_stream,
266                          &cmd_buffer->device->internal_surface_state_pool, 4096);
267 
268    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
269    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
270                          &cmd_buffer->device->dynamic_state_pool, 16384);
271 
272    anv_state_stream_finish(&cmd_buffer->general_state_stream);
273    anv_state_stream_init(&cmd_buffer->general_state_stream,
274                          &cmd_buffer->device->general_state_pool, 16384);
275 
276    anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
277    anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
278                          &cmd_buffer->device->indirect_push_descriptor_pool,
279                          4096);
280 
281    anv_state_stream_finish(&cmd_buffer->push_descriptor_buffer_stream);
282    anv_state_stream_init(&cmd_buffer->push_descriptor_buffer_stream,
283                          &cmd_buffer->device->push_descriptor_buffer_pool, 4096);
284 
285    while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
286       struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
287       anv_device_release_bo(cmd_buffer->device, *bo);
288    }
289 
290    anv_measure_reset(cmd_buffer);
291 
292    u_trace_fini(&cmd_buffer->trace);
293    u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
294 }
295 
296 void
anv_cmd_buffer_reset(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)297 anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
298                      UNUSED VkCommandBufferResetFlags flags)
299 {
300    struct anv_cmd_buffer *cmd_buffer =
301       container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
302 
303    if (cmd_buffer->companion_rcs_cmd_buffer) {
304       reset_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer, flags);
305       destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
306       cmd_buffer->companion_rcs_cmd_buffer = NULL;
307    }
308 
309    ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
310 
311    reset_cmd_buffer(cmd_buffer, flags);
312 }
313 
314 const struct vk_command_buffer_ops anv_cmd_buffer_ops = {
315    .create = anv_create_cmd_buffer,
316    .reset = anv_cmd_buffer_reset,
317    .destroy = anv_cmd_buffer_destroy,
318 };
319 
320 void
anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer * cmd_buffer)321 anv_cmd_buffer_emit_bt_pool_base_address(struct anv_cmd_buffer *cmd_buffer)
322 {
323    const struct intel_device_info *devinfo = cmd_buffer->device->info;
324    anv_genX(devinfo, cmd_buffer_emit_bt_pool_base_address)(cmd_buffer);
325 }
326 
327 void
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count)328 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
329                                   const struct anv_image *image,
330                                   VkImageAspectFlagBits aspect,
331                                   enum isl_aux_usage aux_usage,
332                                   uint32_t level,
333                                   uint32_t base_layer,
334                                   uint32_t layer_count)
335 {
336    const struct intel_device_info *devinfo = cmd_buffer->device->info;
337    anv_genX(devinfo, cmd_buffer_mark_image_written)(cmd_buffer, image,
338                                                     aspect, aux_usage,
339                                                     level, base_layer,
340                                                     layer_count);
341 }
342 
343 void
anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,const enum isl_format format,const struct isl_swizzle swizzle,union isl_color_value clear_color)344 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
345                                        const struct anv_image *image,
346                                        const enum isl_format format,
347                                        const struct isl_swizzle swizzle,
348                                        union isl_color_value clear_color)
349 {
350    const struct intel_device_info *devinfo = cmd_buffer->device->info;
351    anv_genX(devinfo, set_fast_clear_state)(cmd_buffer, image, format, swizzle,
352                                            clear_color);
353 }
354 
355 void
anv_cmd_buffer_load_clear_color(struct anv_cmd_buffer * cmd_buffer,struct anv_state state,const struct anv_image_view * iview)356 anv_cmd_buffer_load_clear_color(struct anv_cmd_buffer *cmd_buffer,
357                                 struct anv_state state,
358                                 const struct anv_image_view *iview)
359 {
360    const struct intel_device_info *devinfo = cmd_buffer->device->info;
361    anv_genX(devinfo, cmd_buffer_load_clear_color)(cmd_buffer, state, iview);
362 }
363 
364 void
anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer * cmd_buffer)365 anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
366 {
367    const struct intel_device_info *devinfo = cmd_buffer->device->info;
368    anv_genX(devinfo, cmd_emit_conditional_render_predicate)(cmd_buffer);
369 }
370 
371 static void
clear_pending_query_bits(enum anv_query_bits * query_bits,enum anv_pipe_bits flushed_bits)372 clear_pending_query_bits(enum anv_query_bits *query_bits,
373                          enum anv_pipe_bits flushed_bits)
374 {
375    if (flushed_bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
376       *query_bits &= ~ANV_QUERY_WRITES_RT_FLUSH;
377 
378    if (flushed_bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
379       *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
380 
381    if ((flushed_bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT) &&
382        (flushed_bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT) &&
383        (flushed_bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT))
384       *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
385 
386    /* Once RT/TILE have been flushed, we can consider the CS_STALL flush */
387    if ((*query_bits & (ANV_QUERY_WRITES_TILE_FLUSH |
388                        ANV_QUERY_WRITES_RT_FLUSH |
389                        ANV_QUERY_WRITES_DATA_FLUSH)) == 0 &&
390        (flushed_bits & (ANV_PIPE_END_OF_PIPE_SYNC_BIT | ANV_PIPE_CS_STALL_BIT)))
391       *query_bits &= ~ANV_QUERY_WRITES_CS_STALL;
392 }
393 
394 void
anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits flushed_bits)395 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
396                                          enum anv_pipe_bits flushed_bits)
397 {
398    clear_pending_query_bits(&cmd_buffer->state.queries.clear_bits, flushed_bits);
399    clear_pending_query_bits(&cmd_buffer->state.queries.buffer_write_bits, flushed_bits);
400 }
401 
402 static bool
mem_update(void * dst,const void * src,size_t size)403 mem_update(void *dst, const void *src, size_t size)
404 {
405    if (memcmp(dst, src, size) == 0)
406       return false;
407 
408    memcpy(dst, src, size);
409    return true;
410 }
411 
412 static void
set_dirty_for_bind_map(struct anv_cmd_buffer * cmd_buffer,gl_shader_stage stage,const struct anv_pipeline_bind_map * map)413 set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
414                        gl_shader_stage stage,
415                        const struct anv_pipeline_bind_map *map)
416 {
417    assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s));
418    if (mem_update(cmd_buffer->state.surface_sha1s[stage],
419                   map->surface_sha1, sizeof(map->surface_sha1)))
420       cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
421 
422    assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s));
423    if (mem_update(cmd_buffer->state.sampler_sha1s[stage],
424                   map->sampler_sha1, sizeof(map->sampler_sha1)))
425       cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
426 
427    assert(stage < ARRAY_SIZE(cmd_buffer->state.push_sha1s));
428    if (mem_update(cmd_buffer->state.push_sha1s[stage],
429                   map->push_sha1, sizeof(map->push_sha1)))
430       cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage);
431 }
432 
433 static void
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipeline_state,struct anv_pipeline * pipeline,VkShaderStageFlags stages)434 anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
435                                     struct anv_cmd_pipeline_state *pipeline_state,
436                                     struct anv_pipeline *pipeline,
437                                     VkShaderStageFlags stages)
438 {
439    struct anv_device *device = cmd_buffer->device;
440    uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
441 
442    uint64_t ray_shadow_size =
443       align64(brw_rt_ray_queries_shadow_stacks_size(device->info,
444                                                     pipeline->ray_queries),
445               4096);
446    if (ray_shadow_size > 0 &&
447        (!cmd_buffer->state.ray_query_shadow_bo ||
448         cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
449       unsigned shadow_size_log2 = MAX2(util_logbase2_ceil(ray_shadow_size), 16);
450       unsigned bucket = shadow_size_log2 - 16;
451       assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos[0]));
452 
453       struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[idx][bucket]);
454       if (bo == NULL) {
455          struct anv_bo *new_bo;
456          VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
457                                                ray_shadow_size,
458                                                ANV_BO_ALLOC_INTERNAL, /* alloc_flags */
459                                                0, /* explicit_address */
460                                                &new_bo);
461          if (result != VK_SUCCESS) {
462             anv_batch_set_error(&cmd_buffer->batch, result);
463             return;
464          }
465 
466          bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[idx][bucket], NULL, new_bo);
467          if (bo != NULL) {
468             anv_device_release_bo(device, new_bo);
469          } else {
470             bo = new_bo;
471          }
472       }
473       cmd_buffer->state.ray_query_shadow_bo = bo;
474 
475       /* Add the ray query buffers to the batch list. */
476       anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
477                             cmd_buffer->state.ray_query_shadow_bo);
478    }
479 
480    /* Add the HW buffer to the list of BO used. */
481    assert(device->ray_query_bo[idx]);
482    anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
483                          device->ray_query_bo[idx]);
484 
485    /* Fill the push constants & mark them dirty. */
486    struct anv_address ray_query_globals_addr =
487       anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
488    pipeline_state->push_constants.ray_query_globals =
489       anv_address_physical(ray_query_globals_addr);
490    cmd_buffer->state.push_constants_dirty |= stages;
491    pipeline_state->push_constants_data_dirty = true;
492 }
493 
494 /**
495  * This function compute changes between 2 pipelines and flags the dirty HW
496  * state appropriately.
497  */
498 static void
anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer * cmd_buffer,struct anv_graphics_pipeline * old_pipeline,struct anv_graphics_pipeline * new_pipeline)499 anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
500                                     struct anv_graphics_pipeline *old_pipeline,
501                                     struct anv_graphics_pipeline *new_pipeline)
502 {
503    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
504    struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
505 
506 #define diff_fix_state(bit, name)                                       \
507    do {                                                                 \
508       /* Fixed states should always have matching sizes */              \
509       assert(old_pipeline == NULL ||                                    \
510              old_pipeline->name.len == new_pipeline->name.len);         \
511       /* Don't bother memcmp if the state is already dirty */           \
512       if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) &&         \
513           (old_pipeline == NULL ||                                      \
514            memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
515                   &new_pipeline->batch_data[new_pipeline->name.offset], \
516                   4 * new_pipeline->name.len) != 0))                    \
517          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit);              \
518    } while (0)
519 #define diff_var_state(bit, name)                                       \
520    do {                                                                 \
521       /* Don't bother memcmp if the state is already dirty */           \
522       /* Also if the new state is empty, avoid marking dirty */         \
523       if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) &&         \
524           new_pipeline->name.len != 0 &&                                \
525           (old_pipeline == NULL ||                                      \
526            old_pipeline->name.len != new_pipeline->name.len ||          \
527            memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
528                   &new_pipeline->batch_data[new_pipeline->name.offset], \
529                   4 * new_pipeline->name.len) != 0))                    \
530          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit);              \
531    } while (0)
532 #define assert_identical(bit, name)                                     \
533    do {                                                                 \
534       /* Fixed states should always have matching sizes */              \
535       assert(old_pipeline == NULL ||                                    \
536              old_pipeline->name.len == new_pipeline->name.len);         \
537       assert(old_pipeline == NULL ||                                    \
538              memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
539                     &new_pipeline->batch_data[new_pipeline->name.offset], \
540                     4 * new_pipeline->name.len) == 0);                  \
541    } while (0)
542 #define assert_empty(name) assert(new_pipeline->name.len == 0)
543 
544    /* Compare all states, including partial packed ones, the dynamic part is
545     * left at 0 but the static part could still change.
546     *
547     * We avoid comparing protected packets as all the fields but the scratch
548     * surface are identical. we just need to select the right one at emission.
549     */
550    diff_fix_state(URB,                      final.urb);
551    diff_fix_state(VF_SGVS,                  final.vf_sgvs);
552    if (cmd_buffer->device->info->ver >= 11)
553       diff_fix_state(VF_SGVS_2,             final.vf_sgvs_2);
554    if (cmd_buffer->device->info->ver >= 12)
555       diff_fix_state(PRIMITIVE_REPLICATION, final.primitive_replication);
556    diff_fix_state(SBE,                      final.sbe);
557    diff_fix_state(SBE_SWIZ,                 final.sbe_swiz);
558    diff_fix_state(VS,                       final.vs);
559    diff_fix_state(HS,                       final.hs);
560    diff_fix_state(DS,                       final.ds);
561 
562    diff_fix_state(CLIP,                     partial.clip);
563    diff_fix_state(SF,                       partial.sf);
564    diff_fix_state(WM,                       partial.wm);
565    diff_fix_state(STREAMOUT,                partial.so);
566    diff_fix_state(GS,                       partial.gs);
567    diff_fix_state(TE,                       partial.te);
568    diff_fix_state(VFG,                      partial.vfg);
569    diff_fix_state(PS,                       partial.ps);
570    diff_fix_state(PS_EXTRA,                 partial.ps_extra);
571 
572    if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
573       diff_fix_state(TASK_CONTROL,          final.task_control);
574       diff_fix_state(TASK_SHADER,           final.task_shader);
575       diff_fix_state(TASK_REDISTRIB,        final.task_redistrib);
576       diff_fix_state(MESH_CONTROL,          final.mesh_control);
577       diff_fix_state(MESH_SHADER,           final.mesh_shader);
578       diff_fix_state(MESH_DISTRIB,          final.mesh_distrib);
579       diff_fix_state(CLIP_MESH,             final.clip_mesh);
580       diff_fix_state(SBE_MESH,              final.sbe_mesh);
581    } else {
582       assert_empty(final.task_control);
583       assert_empty(final.task_shader);
584       assert_empty(final.task_redistrib);
585       assert_empty(final.mesh_control);
586       assert_empty(final.mesh_shader);
587       assert_empty(final.mesh_distrib);
588       assert_empty(final.clip_mesh);
589       assert_empty(final.sbe_mesh);
590    }
591 
592    /* States that can vary in length */
593    diff_var_state(VF_SGVS_INSTANCING,       final.vf_sgvs_instancing);
594    diff_var_state(SO_DECL_LIST,             final.so_decl_list);
595 
596 #undef diff_fix_state
597 #undef diff_var_state
598 #undef assert_identical
599 #undef assert_empty
600 
601    /* We're not diffing the following :
602     *    - anv_graphics_pipeline::vertex_input_data
603     *    - anv_graphics_pipeline::final::vf_instancing
604     *
605     * since they are tracked by the runtime.
606     */
607 }
608 
anv_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)609 void anv_CmdBindPipeline(
610     VkCommandBuffer                             commandBuffer,
611     VkPipelineBindPoint                         pipelineBindPoint,
612     VkPipeline                                  _pipeline)
613 {
614    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
615    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
616    struct anv_cmd_pipeline_state *state;
617    VkShaderStageFlags stages = 0;
618 
619    switch (pipelineBindPoint) {
620    case VK_PIPELINE_BIND_POINT_COMPUTE: {
621       if (cmd_buffer->state.compute.base.pipeline == pipeline)
622          return;
623 
624       cmd_buffer->state.compute.base.pipeline = pipeline;
625       cmd_buffer->state.compute.pipeline_dirty = true;
626 
627       struct anv_compute_pipeline *compute_pipeline =
628          anv_pipeline_to_compute(pipeline);
629       set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
630                              &compute_pipeline->cs->bind_map);
631 
632       state = &cmd_buffer->state.compute.base;
633       stages = VK_SHADER_STAGE_COMPUTE_BIT;
634       break;
635    }
636 
637    case VK_PIPELINE_BIND_POINT_GRAPHICS: {
638       struct anv_graphics_pipeline *new_pipeline =
639          anv_pipeline_to_graphics(pipeline);
640 
641       /* Apply the non dynamic state from the pipeline */
642       vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
643                                         &new_pipeline->dynamic_state);
644 
645       if (cmd_buffer->state.gfx.base.pipeline == pipeline)
646          return;
647 
648       struct anv_graphics_pipeline *old_pipeline =
649          cmd_buffer->state.gfx.base.pipeline == NULL ? NULL :
650          anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
651 
652       cmd_buffer->state.gfx.base.pipeline = pipeline;
653       cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
654 
655       anv_foreach_stage(stage, new_pipeline->base.base.active_stages) {
656          set_dirty_for_bind_map(cmd_buffer, stage,
657                                 &new_pipeline->base.shaders[stage]->bind_map);
658       }
659 
660       state = &cmd_buffer->state.gfx.base;
661       stages = new_pipeline->base.base.active_stages;
662 
663 
664       /* When the pipeline is using independent states and dynamic buffers,
665        * this will trigger an update of anv_push_constants::dynamic_base_index
666        * & anv_push_constants::dynamic_offsets.
667        */
668       struct anv_push_constants *push =
669          &cmd_buffer->state.gfx.base.push_constants;
670       struct anv_pipeline_sets_layout *layout = &new_pipeline->base.base.layout;
671       if (layout->independent_sets && layout->num_dynamic_buffers > 0) {
672          bool modified = false;
673          for (uint32_t s = 0; s < layout->num_sets; s++) {
674             if (layout->set[s].layout == NULL)
675                continue;
676 
677             assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
678             if (layout->set[s].layout->dynamic_offset_count > 0 &&
679                 (push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
680                 layout->set[s].dynamic_offset_start) {
681                push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
682                push->desc_surface_offsets[s] |= (layout->set[s].dynamic_offset_start &
683                                                  ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
684                modified = true;
685             }
686          }
687          if (modified) {
688             cmd_buffer->state.push_constants_dirty |= stages;
689             state->push_constants_data_dirty = true;
690          }
691       }
692 
693       anv_cmd_buffer_flush_pipeline_state(cmd_buffer, old_pipeline, new_pipeline);
694       break;
695    }
696 
697    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
698       if (cmd_buffer->state.rt.base.pipeline == pipeline)
699          return;
700 
701       cmd_buffer->state.rt.base.pipeline = pipeline;
702       cmd_buffer->state.rt.pipeline_dirty = true;
703 
704       struct anv_ray_tracing_pipeline *rt_pipeline =
705          anv_pipeline_to_ray_tracing(pipeline);
706       if (rt_pipeline->stack_size > 0) {
707          anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
708                                                   rt_pipeline->stack_size);
709       }
710 
711       state = &cmd_buffer->state.rt.base;
712       break;
713    }
714 
715    default:
716       unreachable("invalid bind point");
717       break;
718    }
719 
720    if (pipeline->ray_queries > 0)
721       anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
722 }
723 
724 static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,const struct anv_descriptor_set_layout * set_layout,VkShaderStageFlags * out_stages)725 anv_cmd_buffer_get_pipeline_layout_state(struct anv_cmd_buffer *cmd_buffer,
726                                          VkPipelineBindPoint bind_point,
727                                          const struct anv_descriptor_set_layout *set_layout,
728                                          VkShaderStageFlags *out_stages)
729 {
730    *out_stages = set_layout->shader_stages;
731 
732    switch (bind_point) {
733    case VK_PIPELINE_BIND_POINT_GRAPHICS:
734       *out_stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
735          (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ?
736           (VK_SHADER_STAGE_TASK_BIT_EXT |
737            VK_SHADER_STAGE_MESH_BIT_EXT) : 0);
738       return &cmd_buffer->state.gfx.base;
739 
740    case VK_PIPELINE_BIND_POINT_COMPUTE:
741       *out_stages &= VK_SHADER_STAGE_COMPUTE_BIT;
742       return &cmd_buffer->state.compute.base;
743 
744    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
745       *out_stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
746          VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
747          VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
748          VK_SHADER_STAGE_MISS_BIT_KHR |
749          VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
750          VK_SHADER_STAGE_CALLABLE_BIT_KHR;
751       return &cmd_buffer->state.rt.base;
752 
753    default:
754       unreachable("invalid bind point");
755    }
756 }
757 
758 static void
anv_cmd_buffer_maybe_dirty_descriptor_mode(struct anv_cmd_buffer * cmd_buffer,enum anv_cmd_descriptor_buffer_mode new_mode)759 anv_cmd_buffer_maybe_dirty_descriptor_mode(struct anv_cmd_buffer *cmd_buffer,
760                                            enum anv_cmd_descriptor_buffer_mode new_mode)
761 {
762    if (cmd_buffer->state.current_db_mode == new_mode)
763       return;
764 
765    /* Ensure we program the STATE_BASE_ADDRESS properly at least once */
766    cmd_buffer->state.descriptor_buffers.dirty = true;
767    cmd_buffer->state.pending_db_mode = new_mode;
768 }
769 
770 static void
anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,struct anv_pipeline_sets_layout * layout,uint32_t set_index,struct anv_descriptor_set * set,uint32_t * dynamic_offset_count,const uint32_t ** dynamic_offsets)771 anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
772                                    VkPipelineBindPoint bind_point,
773                                    struct anv_pipeline_sets_layout *layout,
774                                    uint32_t set_index,
775                                    struct anv_descriptor_set *set,
776                                    uint32_t *dynamic_offset_count,
777                                    const uint32_t **dynamic_offsets)
778 {
779    /* Either we have no pool because it's a push descriptor or the pool is not
780     * host only :
781     *
782     * VUID-vkCmdBindDescriptorSets-pDescriptorSets-04616:
783     *
784     *    "Each element of pDescriptorSets must not have been allocated from a
785     *     VkDescriptorPool with the
786     *     VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT flag set"
787     */
788    assert(!set->pool || !set->pool->host_only);
789 
790    struct anv_descriptor_set_layout *set_layout =
791       layout ? layout->set[set_index].layout: set->layout;
792 
793    anv_cmd_buffer_maybe_dirty_descriptor_mode(
794       cmd_buffer,
795       (set->layout->flags &
796        VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) != 0 ?
797       ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER :
798       ANV_CMD_DESCRIPTOR_BUFFER_MODE_LEGACY);
799 
800    VkShaderStageFlags stages;
801    struct anv_cmd_pipeline_state *pipe_state =
802       anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
803                                                set_layout, &stages);
804 
805    VkShaderStageFlags dirty_stages = 0;
806    /* If it's a push descriptor set, we have to flag things as dirty
807     * regardless of whether or not the CPU-side data structure changed as we
808     * may have edited in-place.
809     */
810    if (pipe_state->descriptors[set_index] != set ||
811        anv_descriptor_set_is_push(set)) {
812       pipe_state->descriptors[set_index] = set;
813 
814       if (set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) {
815          assert(set->is_push);
816 
817          pipe_state->descriptor_buffers[set_index].buffer_index = -1;
818          pipe_state->descriptor_buffers[set_index].buffer_offset = set->desc_offset;
819          pipe_state->descriptor_buffers[set_index].bound = true;
820          cmd_buffer->state.descriptors_dirty |= stages;
821          cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
822       } else {
823          /* When using indirect descriptors, stages that have access to the HW
824           * binding tables, never need to access the
825           * anv_push_constants::desc_offsets fields, because any data they
826           * need from the descriptor buffer is accessible through a binding
827           * table entry. For stages that are "bindless" (Mesh/Task/RT), we
828           * need to provide anv_push_constants::desc_offsets matching the
829           * bound descriptor so that shaders can access the descriptor buffer
830           * through A64 messages.
831           *
832           * With direct descriptors, the shaders can use the
833           * anv_push_constants::desc_offsets to build bindless offsets. So
834           * it's we always need to update the push constant data.
835           */
836          bool update_desc_sets =
837             !cmd_buffer->device->physical->indirect_descriptors ||
838             (stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
839                        VK_SHADER_STAGE_MESH_BIT_EXT |
840                        VK_SHADER_STAGE_RAYGEN_BIT_KHR |
841                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
842                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
843                        VK_SHADER_STAGE_MISS_BIT_KHR |
844                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
845                        VK_SHADER_STAGE_CALLABLE_BIT_KHR));
846 
847          if (update_desc_sets) {
848             struct anv_push_constants *push = &pipe_state->push_constants;
849             uint64_t offset =
850                anv_address_physical(set->desc_surface_addr) -
851                cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
852             assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
853             push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
854             push->desc_surface_offsets[set_index] |= offset;
855             push->desc_sampler_offsets[set_index] =
856                anv_address_physical(set->desc_sampler_addr) -
857                cmd_buffer->device->physical->va.dynamic_state_pool.addr;
858 
859             anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
860                                   set->desc_surface_addr.bo);
861             anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
862                                   set->desc_sampler_addr.bo);
863          }
864       }
865 
866       dirty_stages |= stages;
867    }
868 
869    if (dynamic_offsets) {
870       if (set_layout->dynamic_offset_count > 0) {
871          struct anv_push_constants *push = &pipe_state->push_constants;
872          assert(layout != NULL);
873          uint32_t dynamic_offset_start =
874             layout->set[set_index].dynamic_offset_start;
875          uint32_t *push_offsets =
876             &push->dynamic_offsets[dynamic_offset_start];
877 
878          memcpy(pipe_state->dynamic_offsets[set_index].offsets,
879                 *dynamic_offsets,
880                 sizeof(uint32_t) * MIN2(*dynamic_offset_count,
881                                         set_layout->dynamic_offset_count));
882 
883          /* Assert that everything is in range */
884          assert(set_layout->dynamic_offset_count <= *dynamic_offset_count);
885          assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
886                 ARRAY_SIZE(push->dynamic_offsets));
887 
888          for (uint32_t i = 0; i < set_layout->dynamic_offset_count; i++) {
889             if (push_offsets[i] != (*dynamic_offsets)[i]) {
890                pipe_state->dynamic_offsets[set_index].offsets[i] =
891                   push_offsets[i] = (*dynamic_offsets)[i];
892                /* dynamic_offset_stages[] elements could contain blanket
893                 * values like VK_SHADER_STAGE_ALL, so limit this to the
894                 * binding point's bits.
895                 */
896                dirty_stages |= set_layout->dynamic_offset_stages[i] & stages;
897             }
898          }
899 
900          *dynamic_offsets += set_layout->dynamic_offset_count;
901          *dynamic_offset_count -= set_layout->dynamic_offset_count;
902       }
903    }
904 
905    if (set->is_push)
906       cmd_buffer->state.push_descriptors_dirty |= dirty_stages;
907    else
908       cmd_buffer->state.descriptors_dirty |= dirty_stages;
909    cmd_buffer->state.push_constants_dirty |= dirty_stages;
910    pipe_state->push_constants_data_dirty = true;
911 }
912 
913 #define ANV_GRAPHICS_STAGE_BITS \
914    (VK_SHADER_STAGE_ALL_GRAPHICS | \
915     VK_SHADER_STAGE_MESH_BIT_EXT | \
916     VK_SHADER_STAGE_TASK_BIT_EXT)
917 
918 #define ANV_RT_STAGE_BITS \
919    (VK_SHADER_STAGE_RAYGEN_BIT_KHR | \
920     VK_SHADER_STAGE_ANY_HIT_BIT_KHR | \
921     VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | \
922     VK_SHADER_STAGE_MISS_BIT_KHR | \
923     VK_SHADER_STAGE_INTERSECTION_BIT_KHR | \
924     VK_SHADER_STAGE_CALLABLE_BIT_KHR)
925 
anv_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pInfo)926 void anv_CmdBindDescriptorSets2KHR(
927     VkCommandBuffer                             commandBuffer,
928     const VkBindDescriptorSetsInfoKHR*          pInfo)
929 {
930    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
931    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
932    struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
933 
934    assert(pInfo->firstSet + pInfo->descriptorSetCount <= MAX_SETS);
935 
936    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
937       uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
938       const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
939 
940       for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
941          ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
942          if (set == NULL)
943             continue;
944          anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
945                                             VK_PIPELINE_BIND_POINT_COMPUTE,
946                                             layout, pInfo->firstSet + i, set,
947                                             &dynamicOffsetCount,
948                                             &pDynamicOffsets);
949       }
950    }
951    if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
952       uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
953       const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
954 
955       for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
956          ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
957          if (set == NULL)
958             continue;
959          anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
960                                             VK_PIPELINE_BIND_POINT_GRAPHICS,
961                                             layout, pInfo->firstSet + i, set,
962                                             &dynamicOffsetCount,
963                                             &pDynamicOffsets);
964       }
965    }
966    if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
967       uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
968       const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
969 
970       for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
971          ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
972          if (set == NULL)
973             continue;
974          anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
975                                             VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
976                                             layout, pInfo->firstSet + i, set,
977                                             &dynamicOffsetCount,
978                                             &pDynamicOffsets);
979       }
980    }
981 }
982 
anv_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,uint32_t bufferCount,const VkDescriptorBufferBindingInfoEXT * pBindingInfos)983 void anv_CmdBindDescriptorBuffersEXT(
984     VkCommandBuffer                             commandBuffer,
985     uint32_t                                    bufferCount,
986     const VkDescriptorBufferBindingInfoEXT*     pBindingInfos)
987 {
988    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
989    struct anv_cmd_state *state = &cmd_buffer->state;
990 
991    for (uint32_t i = 0; i < bufferCount; i++) {
992       assert(pBindingInfos[i].address >= cmd_buffer->device->physical->va.dynamic_visible_pool.addr &&
993              pBindingInfos[i].address < (cmd_buffer->device->physical->va.dynamic_visible_pool.addr +
994                                          cmd_buffer->device->physical->va.dynamic_visible_pool.size));
995 
996       if (state->descriptor_buffers.address[i] != pBindingInfos[i].address) {
997          state->descriptor_buffers.address[i] = pBindingInfos[i].address;
998          if (pBindingInfos[i].usage & VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT)
999             state->descriptor_buffers.surfaces_address = pBindingInfos[i].address;
1000          if (pBindingInfos[i].usage & VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)
1001             state->descriptor_buffers.samplers_address = pBindingInfos[i].address;
1002          state->descriptor_buffers.dirty = true;
1003          state->descriptor_buffers.offsets_dirty = ~0;
1004       }
1005    }
1006 
1007    anv_cmd_buffer_maybe_dirty_descriptor_mode(cmd_buffer,
1008                                               ANV_CMD_DESCRIPTOR_BUFFER_MODE_BUFFER);
1009 }
1010 
1011 static void
anv_cmd_buffer_set_descriptor_buffer_offsets(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,struct anv_pipeline_layout * layout,uint32_t first_set,uint32_t set_count,const VkDeviceSize * buffer_offsets,const uint32_t * buffer_indices)1012 anv_cmd_buffer_set_descriptor_buffer_offsets(struct anv_cmd_buffer *cmd_buffer,
1013                                              VkPipelineBindPoint bind_point,
1014                                              struct anv_pipeline_layout *layout,
1015                                              uint32_t first_set,
1016                                              uint32_t set_count,
1017                                              const VkDeviceSize *buffer_offsets,
1018                                              const uint32_t *buffer_indices)
1019 {
1020    for (uint32_t i = 0; i < set_count; i++) {
1021       const uint32_t set_index = first_set + i;
1022 
1023       const struct anv_descriptor_set_layout *set_layout =
1024          layout->sets_layout.set[set_index].layout;
1025       VkShaderStageFlags stages;
1026       struct anv_cmd_pipeline_state *pipe_state =
1027          anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, bind_point,
1028                                                   set_layout, &stages);
1029 
1030       if (buffer_offsets[i] != pipe_state->descriptor_buffers[set_index].buffer_offset ||
1031           buffer_indices[i] != pipe_state->descriptor_buffers[set_index].buffer_index ||
1032           !pipe_state->descriptor_buffers[set_index].bound) {
1033          pipe_state->descriptor_buffers[set_index].buffer_index = buffer_indices[i];
1034          pipe_state->descriptor_buffers[set_index].buffer_offset = buffer_offsets[i];
1035          cmd_buffer->state.descriptors_dirty |= stages;
1036          cmd_buffer->state.descriptor_buffers.offsets_dirty |= stages;
1037       }
1038       pipe_state->descriptor_buffers[set_index].bound = true;
1039    }
1040 }
1041 
anv_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,const VkSetDescriptorBufferOffsetsInfoEXT * pSetDescriptorBufferOffsetsInfo)1042 void anv_CmdSetDescriptorBufferOffsets2EXT(
1043     VkCommandBuffer                             commandBuffer,
1044     const VkSetDescriptorBufferOffsetsInfoEXT*  pSetDescriptorBufferOffsetsInfo)
1045 {
1046    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1047    ANV_FROM_HANDLE(anv_pipeline_layout, layout, pSetDescriptorBufferOffsetsInfo->layout);
1048 
1049    if (pSetDescriptorBufferOffsetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
1050       anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
1051                                                    VK_PIPELINE_BIND_POINT_COMPUTE,
1052                                                    layout,
1053                                                    pSetDescriptorBufferOffsetsInfo->firstSet,
1054                                                    pSetDescriptorBufferOffsetsInfo->setCount,
1055                                                    pSetDescriptorBufferOffsetsInfo->pOffsets,
1056                                                    pSetDescriptorBufferOffsetsInfo->pBufferIndices);
1057    }
1058    if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
1059       anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
1060                                                    VK_PIPELINE_BIND_POINT_GRAPHICS,
1061                                                    layout,
1062                                                    pSetDescriptorBufferOffsetsInfo->firstSet,
1063                                                    pSetDescriptorBufferOffsetsInfo->setCount,
1064                                                    pSetDescriptorBufferOffsetsInfo->pOffsets,
1065                                                    pSetDescriptorBufferOffsetsInfo->pBufferIndices);
1066    }
1067    if (pSetDescriptorBufferOffsetsInfo->stageFlags & ANV_RT_STAGE_BITS) {
1068       anv_cmd_buffer_set_descriptor_buffer_offsets(cmd_buffer,
1069                                                    VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
1070                                                    layout,
1071                                                    pSetDescriptorBufferOffsetsInfo->firstSet,
1072                                                    pSetDescriptorBufferOffsetsInfo->setCount,
1073                                                    pSetDescriptorBufferOffsetsInfo->pOffsets,
1074                                                    pSetDescriptorBufferOffsetsInfo->pBufferIndices);
1075    }
1076 }
1077 
anv_CmdBindDescriptorBufferEmbeddedSamplers2EXT(VkCommandBuffer commandBuffer,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * pBindDescriptorBufferEmbeddedSamplersInfo)1078 void anv_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
1079     VkCommandBuffer                             commandBuffer,
1080     const VkBindDescriptorBufferEmbeddedSamplersInfoEXT* pBindDescriptorBufferEmbeddedSamplersInfo)
1081 {
1082    /* no-op */
1083 }
1084 
anv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)1085 void anv_CmdBindVertexBuffers2(
1086    VkCommandBuffer                              commandBuffer,
1087    uint32_t                                     firstBinding,
1088    uint32_t                                     bindingCount,
1089    const VkBuffer*                              pBuffers,
1090    const VkDeviceSize*                          pOffsets,
1091    const VkDeviceSize*                          pSizes,
1092    const VkDeviceSize*                          pStrides)
1093 {
1094    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1095    struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
1096 
1097    /* We have to defer setting up vertex buffer since we need the buffer
1098     * stride from the pipeline. */
1099 
1100    assert(firstBinding + bindingCount <= MAX_VBS);
1101    for (uint32_t i = 0; i < bindingCount; i++) {
1102       ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
1103 
1104       if (buffer == NULL) {
1105          vb[firstBinding + i] = (struct anv_vertex_binding) {
1106             .buffer = NULL,
1107          };
1108       } else {
1109          vb[firstBinding + i] = (struct anv_vertex_binding) {
1110             .buffer = buffer,
1111             .offset = pOffsets[i],
1112             .size = vk_buffer_range(&buffer->vk, pOffsets[i],
1113                                     pSizes ? pSizes[i] : VK_WHOLE_SIZE),
1114          };
1115       }
1116       cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
1117    }
1118 
1119    if (pStrides != NULL) {
1120       vk_cmd_set_vertex_binding_strides(&cmd_buffer->vk, firstBinding,
1121                                         bindingCount, pStrides);
1122    }
1123 }
1124 
anv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes)1125 void anv_CmdBindTransformFeedbackBuffersEXT(
1126     VkCommandBuffer                             commandBuffer,
1127     uint32_t                                    firstBinding,
1128     uint32_t                                    bindingCount,
1129     const VkBuffer*                             pBuffers,
1130     const VkDeviceSize*                         pOffsets,
1131     const VkDeviceSize*                         pSizes)
1132 {
1133    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1134    struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
1135 
1136    /* We have to defer setting up vertex buffer since we need the buffer
1137     * stride from the pipeline. */
1138 
1139    assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
1140    for (uint32_t i = 0; i < bindingCount; i++) {
1141       if (pBuffers[i] == VK_NULL_HANDLE) {
1142          xfb[firstBinding + i].buffer = NULL;
1143       } else {
1144          ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
1145          xfb[firstBinding + i].buffer = buffer;
1146          xfb[firstBinding + i].offset = pOffsets[i];
1147          xfb[firstBinding + i].size =
1148             vk_buffer_range(&buffer->vk, pOffsets[i],
1149                             pSizes ? pSizes[i] : VK_WHOLE_SIZE);
1150       }
1151    }
1152 }
1153 
1154 enum isl_format
anv_isl_format_for_descriptor_type(const struct anv_device * device,VkDescriptorType type)1155 anv_isl_format_for_descriptor_type(const struct anv_device *device,
1156                                    VkDescriptorType type)
1157 {
1158    switch (type) {
1159    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1160    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1161       return device->physical->compiler->indirect_ubos_use_sampler ?
1162              ISL_FORMAT_R32G32B32A32_FLOAT : ISL_FORMAT_RAW;
1163 
1164    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1165    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1166       return ISL_FORMAT_RAW;
1167 
1168    default:
1169       unreachable("Invalid descriptor type");
1170    }
1171 }
1172 
1173 struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer * cmd_buffer,const void * data,uint32_t size,uint32_t alignment)1174 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
1175                             const void *data, uint32_t size, uint32_t alignment)
1176 {
1177    struct anv_state state;
1178 
1179    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
1180    memcpy(state.map, data, size);
1181 
1182    VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
1183 
1184    return state;
1185 }
1186 
1187 struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer * cmd_buffer,uint32_t * a,uint32_t * b,uint32_t dwords,uint32_t alignment)1188 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
1189                              uint32_t *a, uint32_t *b,
1190                              uint32_t dwords, uint32_t alignment)
1191 {
1192    struct anv_state state;
1193    uint32_t *p;
1194 
1195    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1196                                               dwords * 4, alignment);
1197    p = state.map;
1198    for (uint32_t i = 0; i < dwords; i++) {
1199       assert((a[i] & b[i]) == 0);
1200       p[i] = a[i] | b[i];
1201    }
1202 
1203    VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
1204 
1205    return state;
1206 }
1207 
1208 struct anv_state
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer * cmd_buffer)1209 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
1210 {
1211    const struct anv_push_constants *data =
1212       &cmd_buffer->state.gfx.base.push_constants;
1213 
1214    struct anv_state state =
1215       anv_cmd_buffer_alloc_temporary_state(cmd_buffer,
1216                                            sizeof(struct anv_push_constants),
1217                                            32 /* bottom 5 bits MBZ */);
1218    if (state.alloc_size == 0)
1219       return state;
1220 
1221    memcpy(state.map, data->client_data,
1222           cmd_buffer->state.gfx.base.push_constants_client_size);
1223    memcpy(state.map + sizeof(data->client_data),
1224           &data->desc_surface_offsets,
1225           sizeof(struct anv_push_constants) - sizeof(data->client_data));
1226 
1227    return state;
1228 }
1229 
1230 struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer * cmd_buffer)1231 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
1232 {
1233    const struct intel_device_info *devinfo = cmd_buffer->device->info;
1234    struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1235    struct anv_push_constants *data = &pipe_state->push_constants;
1236    struct anv_compute_pipeline *pipeline =
1237       anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline);
1238    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1239    const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
1240 
1241    const struct intel_cs_dispatch_info dispatch =
1242       brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1243    const unsigned total_push_constants_size =
1244       brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
1245    if (total_push_constants_size == 0)
1246       return (struct anv_state) { .offset = 0 };
1247 
1248    const unsigned push_constant_alignment = 64;
1249    const unsigned aligned_total_push_constants_size =
1250       ALIGN(total_push_constants_size, push_constant_alignment);
1251    struct anv_state state;
1252    if (devinfo->verx10 >= 125) {
1253       state = anv_state_stream_alloc(&cmd_buffer->general_state_stream,
1254                                      aligned_total_push_constants_size,
1255                                      push_constant_alignment);
1256    } else {
1257       state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1258                                                  aligned_total_push_constants_size,
1259                                                  push_constant_alignment);
1260    }
1261    if (state.map == NULL)
1262       return state;
1263 
1264    void *dst = state.map;
1265    const void *src = (char *)data + (range->start * 32);
1266 
1267    if (cs_prog_data->push.cross_thread.size > 0) {
1268       memcpy(dst, src, cs_prog_data->push.cross_thread.size);
1269       dst += cs_prog_data->push.cross_thread.size;
1270       src += cs_prog_data->push.cross_thread.size;
1271    }
1272 
1273    if (cs_prog_data->push.per_thread.size > 0) {
1274       for (unsigned t = 0; t < dispatch.threads; t++) {
1275          memcpy(dst, src, cs_prog_data->push.per_thread.size);
1276 
1277          uint32_t *subgroup_id = dst +
1278             offsetof(struct anv_push_constants, cs.subgroup_id) -
1279             (range->start * 32 + cs_prog_data->push.cross_thread.size);
1280          *subgroup_id = t;
1281 
1282          dst += cs_prog_data->push.per_thread.size;
1283       }
1284    }
1285 
1286    return state;
1287 }
1288 
anv_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pInfo)1289 void anv_CmdPushConstants2KHR(
1290     VkCommandBuffer                             commandBuffer,
1291     const VkPushConstantsInfoKHR*               pInfo)
1292 {
1293    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1294 
1295    if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
1296       struct anv_cmd_pipeline_state *pipe_state =
1297          &cmd_buffer->state.gfx.base;
1298 
1299       memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1300              pInfo->pValues, pInfo->size);
1301       pipe_state->push_constants_data_dirty = true;
1302       pipe_state->push_constants_client_size = MAX2(
1303          pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
1304    }
1305    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
1306       struct anv_cmd_pipeline_state *pipe_state =
1307          &cmd_buffer->state.compute.base;
1308 
1309       memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1310              pInfo->pValues, pInfo->size);
1311       pipe_state->push_constants_data_dirty = true;
1312       pipe_state->push_constants_client_size = MAX2(
1313          pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
1314    }
1315    if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
1316       struct anv_cmd_pipeline_state *pipe_state =
1317          &cmd_buffer->state.rt.base;
1318 
1319       memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1320              pInfo->pValues, pInfo->size);
1321       pipe_state->push_constants_data_dirty = true;
1322       pipe_state->push_constants_client_size = MAX2(
1323          pipe_state->push_constants_client_size, pInfo->offset + pInfo->size);
1324    }
1325 
1326    cmd_buffer->state.push_constants_dirty |= pInfo->stageFlags;
1327 }
1328 
1329 static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1330 anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer *cmd_buffer,
1331                               VkPipelineBindPoint bind_point)
1332 {
1333    switch (bind_point) {
1334    case VK_PIPELINE_BIND_POINT_GRAPHICS:
1335       return &cmd_buffer->state.gfx.base;
1336    case VK_PIPELINE_BIND_POINT_COMPUTE:
1337       return &cmd_buffer->state.compute.base;
1338    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1339       return &cmd_buffer->state.rt.base;
1340       break;
1341    default:
1342       unreachable("invalid bind point");
1343    }
1344 }
1345 
1346 static void
anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,const VkPushDescriptorSetInfoKHR * pInfo)1347 anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
1348                                     VkPipelineBindPoint bind_point,
1349                                     const VkPushDescriptorSetInfoKHR *pInfo)
1350 {
1351    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1352    struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1353 
1354    assert(pInfo->set < MAX_SETS);
1355 
1356    struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1357    struct anv_push_descriptor_set *push_set =
1358       &anv_cmd_buffer_get_pipe_state(cmd_buffer,
1359                                      bind_point)->push_descriptor;
1360    if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1361       return;
1362 
1363    anv_descriptor_set_write(cmd_buffer->device, &push_set->set,
1364                             pInfo->descriptorWriteCount,
1365                             pInfo->pDescriptorWrites);
1366 
1367    anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point,
1368                                       layout, pInfo->set, &push_set->set,
1369                                       NULL, NULL);
1370 }
1371 
anv_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pInfo)1372 void anv_CmdPushDescriptorSet2KHR(
1373     VkCommandBuffer                            commandBuffer,
1374     const VkPushDescriptorSetInfoKHR*          pInfo)
1375 {
1376    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1377 
1378    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
1379       anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1380                                           VK_PIPELINE_BIND_POINT_COMPUTE,
1381                                           pInfo);
1382    if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS)
1383       anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1384                                           VK_PIPELINE_BIND_POINT_GRAPHICS,
1385                                           pInfo);
1386    if (pInfo->stageFlags & ANV_RT_STAGE_BITS)
1387       anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1388                                           VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
1389                                           pInfo);
1390 }
1391 
anv_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pInfo)1392 void anv_CmdPushDescriptorSetWithTemplate2KHR(
1393     VkCommandBuffer                                commandBuffer,
1394     const VkPushDescriptorSetWithTemplateInfoKHR*  pInfo)
1395 {
1396    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1397    VK_FROM_HANDLE(vk_descriptor_update_template, template,
1398                   pInfo->descriptorUpdateTemplate);
1399    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1400    struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1401 
1402    assert(pInfo->set < MAX_PUSH_DESCRIPTORS);
1403 
1404    struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1405    UNUSED VkShaderStageFlags stages;
1406    struct anv_cmd_pipeline_state *pipe_state =
1407       anv_cmd_buffer_get_pipeline_layout_state(cmd_buffer, template->bind_point,
1408                                                set_layout, &stages);
1409    struct anv_push_descriptor_set *push_set = &pipe_state->push_descriptor;
1410    if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1411       return;
1412 
1413    anv_descriptor_set_write_template(cmd_buffer->device, &push_set->set,
1414                                      template,
1415                                      pInfo->pData);
1416 
1417    anv_cmd_buffer_bind_descriptor_set(cmd_buffer, template->bind_point,
1418                                       layout, pInfo->set, &push_set->set,
1419                                       NULL, NULL);
1420 }
1421 
anv_CmdSetRayTracingPipelineStackSizeKHR(VkCommandBuffer commandBuffer,uint32_t pipelineStackSize)1422 void anv_CmdSetRayTracingPipelineStackSizeKHR(
1423     VkCommandBuffer                             commandBuffer,
1424     uint32_t                                    pipelineStackSize)
1425 {
1426    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1427    struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
1428    struct anv_device *device = cmd_buffer->device;
1429 
1430    if (anv_batch_has_error(&cmd_buffer->batch))
1431       return;
1432 
1433    uint32_t stack_ids_per_dss = 2048; /* TODO */
1434 
1435    unsigned stack_size_log2 = util_logbase2_ceil(pipelineStackSize);
1436    if (stack_size_log2 < 10)
1437       stack_size_log2 = 10;
1438 
1439    if (rt->scratch.layout.total_size == 1 << stack_size_log2)
1440       return;
1441 
1442    brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info,
1443                                  stack_ids_per_dss, 1 << stack_size_log2);
1444 
1445    unsigned bucket = stack_size_log2 - 10;
1446    assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
1447 
1448    struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
1449    if (bo == NULL) {
1450       struct anv_bo *new_bo;
1451       VkResult result = anv_device_alloc_bo(device, "RT scratch",
1452                                             rt->scratch.layout.total_size,
1453                                             ANV_BO_ALLOC_INTERNAL, /* alloc_flags */
1454                                             0, /* explicit_address */
1455                                             &new_bo);
1456       if (result != VK_SUCCESS) {
1457          rt->scratch.layout.total_size = 0;
1458          anv_batch_set_error(&cmd_buffer->batch, result);
1459          return;
1460       }
1461 
1462       bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
1463       if (bo != NULL) {
1464          anv_device_release_bo(device, bo);
1465       } else {
1466          bo = new_bo;
1467       }
1468    }
1469 
1470    rt->scratch.bo = bo;
1471 }
1472 
1473 void
anv_cmd_buffer_save_state(struct anv_cmd_buffer * cmd_buffer,uint32_t flags,struct anv_cmd_saved_state * state)1474 anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
1475                           uint32_t flags,
1476                           struct anv_cmd_saved_state *state)
1477 {
1478    state->flags = flags;
1479 
1480    /* we only support the compute pipeline at the moment */
1481    assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1482    const struct anv_cmd_pipeline_state *pipe_state =
1483       &cmd_buffer->state.compute.base;
1484 
1485    if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE)
1486       state->pipeline = pipe_state->pipeline;
1487 
1488    if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0)
1489       state->descriptor_set[0] = pipe_state->descriptors[0];
1490 
1491    if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL) {
1492       for (uint32_t i = 0; i < MAX_SETS; i++) {
1493          state->descriptor_set[i] = pipe_state->descriptors[i];
1494       }
1495    }
1496 
1497    if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1498       memcpy(state->push_constants, pipe_state->push_constants.client_data,
1499              sizeof(state->push_constants));
1500    }
1501 }
1502 
1503 void
anv_cmd_buffer_restore_state(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_saved_state * state)1504 anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
1505                              struct anv_cmd_saved_state *state)
1506 {
1507    VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
1508 
1509    assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1510    const VkPipelineBindPoint bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1511    const VkShaderStageFlags stage_flags = VK_SHADER_STAGE_COMPUTE_BIT;
1512    struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1513 
1514    if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE) {
1515        if (state->pipeline) {
1516           anv_CmdBindPipeline(cmd_buffer_, bind_point,
1517                               anv_pipeline_to_handle(state->pipeline));
1518        } else {
1519           pipe_state->pipeline = NULL;
1520        }
1521    }
1522 
1523    if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0) {
1524       if (state->descriptor_set[0]) {
1525          anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, 0,
1526                                             state->descriptor_set[0], NULL,
1527                                             NULL);
1528       } else {
1529          pipe_state->descriptors[0] = NULL;
1530       }
1531    }
1532 
1533    if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL) {
1534       for (uint32_t i = 0; i < MAX_SETS; i++) {
1535          if (state->descriptor_set[i]) {
1536             anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, i,
1537                                                state->descriptor_set[i], NULL,
1538                                                NULL);
1539          } else {
1540             pipe_state->descriptors[i] = NULL;
1541          }
1542       }
1543    }
1544 
1545    if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1546       VkPushConstantsInfoKHR push_info = {
1547          .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
1548          .layout = VK_NULL_HANDLE,
1549          .stageFlags = stage_flags,
1550          .offset = 0,
1551          .size = sizeof(state->push_constants),
1552          .pValues = state->push_constants,
1553       };
1554       anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
1555    }
1556 }
1557 
1558 void
anv_cmd_write_buffer_cp(VkCommandBuffer commandBuffer,VkDeviceAddress dstAddr,void * data,uint32_t size)1559 anv_cmd_write_buffer_cp(VkCommandBuffer commandBuffer,
1560                         VkDeviceAddress dstAddr,
1561                         void *data,
1562                         uint32_t size)
1563 {
1564    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1565    anv_genX(cmd_buffer->device->info, cmd_write_buffer_cp)(cmd_buffer, dstAddr,
1566                                                            data, size);
1567 }
1568 
1569 void
anv_cmd_flush_buffer_write_cp(VkCommandBuffer commandBuffer)1570 anv_cmd_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
1571 {
1572    /* TODO: cmd_write_buffer_cp is implemented with MI store +
1573     * ForceWriteCompletionCheck so that should make the content globally
1574     * observable.
1575     *
1576     * If we encounter any functional or perf bottleneck issues, let's revisit
1577     * this helper and add ANV_PIPE_HDC_PIPELINE_FLUSH_BIT +
1578     * ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT +
1579     * ANV_PIPE_DATA_CACHE_FLUSH_BIT.
1580     */
1581 }
1582 
1583 void
anv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,uint32_t invocations_x,uint32_t invocations_y,uint32_t invocations_z)1584 anv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer,
1585                            uint32_t invocations_x,
1586                            uint32_t invocations_y,
1587                            uint32_t invocations_z)
1588 {
1589    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1590 
1591    anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
1592       (commandBuffer, invocations_x, invocations_y, invocations_z);
1593 }
1594