• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "anv_private.h"
31 #include "anv_measure.h"
32 
33 #include "vk_util.h"
34 
35 /** \file anv_cmd_buffer.c
36  *
37  * This file contains all of the stuff for emitting commands into a command
38  * buffer.  This includes implementations of most of the vkCmd*
39  * entrypoints.  This file is concerned entirely with state emission and
40  * not with the command buffer data structure itself.  As far as this file
41  * is concerned, most of anv_cmd_buffer is magic.
42  */
43 
44 static void
anv_cmd_state_init(struct anv_cmd_buffer * cmd_buffer)45 anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
46 {
47    struct anv_cmd_state *state = &cmd_buffer->state;
48 
49    memset(state, 0, sizeof(*state));
50 
51    state->current_pipeline = UINT32_MAX;
52    state->gfx.restart_index = UINT32_MAX;
53    state->gfx.object_preemption = true;
54    state->gfx.dirty = 0;
55 
56    memcpy(state->gfx.dyn_state.dirty,
57           cmd_buffer->device->gfx_dirty_state,
58           sizeof(state->gfx.dyn_state.dirty));
59 }
60 
61 static void
anv_cmd_pipeline_state_finish(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipe_state)62 anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer,
63                               struct anv_cmd_pipeline_state *pipe_state)
64 {
65    anv_push_descriptor_set_finish(&pipe_state->push_descriptor);
66 }
67 
68 static void
anv_cmd_state_finish(struct anv_cmd_buffer * cmd_buffer)69 anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer)
70 {
71    struct anv_cmd_state *state = &cmd_buffer->state;
72 
73    anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base);
74    anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base);
75 }
76 
77 static void
anv_cmd_state_reset(struct anv_cmd_buffer * cmd_buffer)78 anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
79 {
80    anv_cmd_state_finish(cmd_buffer);
81    anv_cmd_state_init(cmd_buffer);
82 
83    cmd_buffer->last_compute_walker = NULL;
84    cmd_buffer->last_indirect_dispatch = NULL;
85 }
86 
87 VkResult
anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer * cmd_buffer)88 anv_cmd_buffer_ensure_rcs_companion(struct anv_cmd_buffer *cmd_buffer)
89 {
90    if (cmd_buffer->companion_rcs_cmd_buffer)
91       return VK_SUCCESS;
92 
93    VkResult result = VK_SUCCESS;
94    pthread_mutex_lock(&cmd_buffer->device->mutex);
95    VK_FROM_HANDLE(vk_command_pool, pool,
96                   cmd_buffer->device->companion_rcs_cmd_pool);
97    assert(pool != NULL);
98 
99    struct vk_command_buffer *tmp_cmd_buffer = NULL;
100    result = pool->command_buffer_ops->create(pool, &tmp_cmd_buffer);
101 
102    if (result != VK_SUCCESS)
103       goto unlock_and_return;
104 
105    cmd_buffer->companion_rcs_cmd_buffer =
106       container_of(tmp_cmd_buffer, struct anv_cmd_buffer, vk);
107    cmd_buffer->companion_rcs_cmd_buffer->vk.level = cmd_buffer->vk.level;
108    cmd_buffer->companion_rcs_cmd_buffer->is_companion_rcs_cmd_buffer = true;
109 
110 unlock_and_return:
111    pthread_mutex_unlock(&cmd_buffer->device->mutex);
112    return result;
113 }
114 
115 static VkResult
anv_create_cmd_buffer(struct vk_command_pool * pool,struct vk_command_buffer ** cmd_buffer_out)116 anv_create_cmd_buffer(struct vk_command_pool *pool,
117                       struct vk_command_buffer **cmd_buffer_out)
118 {
119    struct anv_device *device =
120       container_of(pool->base.device, struct anv_device, vk);
121    struct anv_cmd_buffer *cmd_buffer;
122    VkResult result;
123 
124    cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
125                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
126    if (cmd_buffer == NULL)
127       return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
128 
129    result = vk_command_buffer_init(pool, &cmd_buffer->vk,
130                                    &anv_cmd_buffer_ops, 0);
131    if (result != VK_SUCCESS)
132       goto fail_alloc;
133 
134    cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations =
135       &cmd_buffer->state.gfx.sample_locations;
136    cmd_buffer->vk.dynamic_graphics_state.vi =
137       &cmd_buffer->state.gfx.vertex_input;
138 
139    cmd_buffer->batch.status = VK_SUCCESS;
140    cmd_buffer->generation.batch.status = VK_SUCCESS;
141 
142    cmd_buffer->device = device;
143 
144    assert(pool->queue_family_index < device->physical->queue.family_count);
145    cmd_buffer->queue_family =
146       &device->physical->queue.families[pool->queue_family_index];
147 
148    result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
149    if (result != VK_SUCCESS)
150       goto fail_vk;
151 
152    anv_state_stream_init(&cmd_buffer->surface_state_stream,
153                          &device->internal_surface_state_pool, 4096);
154    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
155                          &device->dynamic_state_pool, 16384);
156    anv_state_stream_init(&cmd_buffer->general_state_stream,
157                          &device->general_state_pool, 16384);
158    anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
159                          &device->indirect_push_descriptor_pool, 4096);
160 
161    int success = u_vector_init_pow2(&cmd_buffer->dynamic_bos, 8,
162                                     sizeof(struct anv_bo *));
163    if (!success)
164       goto fail_batch_bo;
165 
166    cmd_buffer->self_mod_locations = NULL;
167    cmd_buffer->companion_rcs_cmd_buffer = NULL;
168    cmd_buffer->is_companion_rcs_cmd_buffer = false;
169 
170    cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
171    cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
172 
173    cmd_buffer->last_compute_walker = NULL;
174    cmd_buffer->last_indirect_dispatch = NULL;
175 
176    memset(&cmd_buffer->generation.shader_state, 0,
177           sizeof(cmd_buffer->generation.shader_state));
178 
179    anv_cmd_state_init(cmd_buffer);
180 
181    anv_measure_init(cmd_buffer);
182 
183    u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
184 
185    *cmd_buffer_out = &cmd_buffer->vk;
186 
187    return VK_SUCCESS;
188 
189  fail_batch_bo:
190    anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
191  fail_vk:
192    vk_command_buffer_finish(&cmd_buffer->vk);
193  fail_alloc:
194    vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
195 
196    return result;
197 }
198 
199 static void
destroy_cmd_buffer(struct anv_cmd_buffer * cmd_buffer)200 destroy_cmd_buffer(struct anv_cmd_buffer *cmd_buffer)
201 {
202    u_trace_fini(&cmd_buffer->trace);
203 
204    anv_measure_destroy(cmd_buffer);
205 
206    anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
207 
208    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
209    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
210    anv_state_stream_finish(&cmd_buffer->general_state_stream);
211    anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
212 
213    while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
214       struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
215       anv_bo_pool_free((*bo)->map != NULL ?
216                        &cmd_buffer->device->batch_bo_pool :
217                        &cmd_buffer->device->bvh_bo_pool, *bo);
218    }
219    u_vector_finish(&cmd_buffer->dynamic_bos);
220 
221    anv_cmd_state_finish(cmd_buffer);
222 
223    vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer->self_mod_locations);
224 
225    vk_command_buffer_finish(&cmd_buffer->vk);
226    vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
227 }
228 
229 static void
anv_cmd_buffer_destroy(struct vk_command_buffer * vk_cmd_buffer)230 anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
231 {
232    struct anv_cmd_buffer *cmd_buffer =
233       container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
234    struct anv_device *device = cmd_buffer->device;
235 
236    pthread_mutex_lock(&device->mutex);
237    if (cmd_buffer->companion_rcs_cmd_buffer) {
238       destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
239       cmd_buffer->companion_rcs_cmd_buffer = NULL;
240    }
241 
242    ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
243 
244    destroy_cmd_buffer(cmd_buffer);
245    pthread_mutex_unlock(&device->mutex);
246 }
247 
248 static void
reset_cmd_buffer(struct anv_cmd_buffer * cmd_buffer,UNUSED VkCommandBufferResetFlags flags)249 reset_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
250                  UNUSED VkCommandBufferResetFlags flags)
251 {
252    vk_command_buffer_reset(&cmd_buffer->vk);
253 
254    cmd_buffer->usage_flags = 0;
255    cmd_buffer->perf_query_pool = NULL;
256    cmd_buffer->is_companion_rcs_cmd_buffer = false;
257    anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
258    anv_cmd_state_reset(cmd_buffer);
259 
260    memset(&cmd_buffer->generation.shader_state, 0,
261           sizeof(cmd_buffer->generation.shader_state));
262 
263    cmd_buffer->generation.jump_addr = ANV_NULL_ADDRESS;
264    cmd_buffer->generation.return_addr = ANV_NULL_ADDRESS;
265 
266    anv_state_stream_finish(&cmd_buffer->surface_state_stream);
267    anv_state_stream_init(&cmd_buffer->surface_state_stream,
268                          &cmd_buffer->device->internal_surface_state_pool, 4096);
269 
270    anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
271    anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
272                          &cmd_buffer->device->dynamic_state_pool, 16384);
273 
274    anv_state_stream_finish(&cmd_buffer->general_state_stream);
275    anv_state_stream_init(&cmd_buffer->general_state_stream,
276                          &cmd_buffer->device->general_state_pool, 16384);
277 
278    anv_state_stream_finish(&cmd_buffer->indirect_push_descriptor_stream);
279    anv_state_stream_init(&cmd_buffer->indirect_push_descriptor_stream,
280                          &cmd_buffer->device->indirect_push_descriptor_pool,
281                          4096);
282 
283    while (u_vector_length(&cmd_buffer->dynamic_bos) > 0) {
284       struct anv_bo **bo = u_vector_remove(&cmd_buffer->dynamic_bos);
285       anv_device_release_bo(cmd_buffer->device, *bo);
286    }
287 
288    anv_measure_reset(cmd_buffer);
289 
290    u_trace_fini(&cmd_buffer->trace);
291    u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
292 }
293 
294 void
anv_cmd_buffer_reset(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)295 anv_cmd_buffer_reset(struct vk_command_buffer *vk_cmd_buffer,
296                      UNUSED VkCommandBufferResetFlags flags)
297 {
298    struct anv_cmd_buffer *cmd_buffer =
299       container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
300 
301    if (cmd_buffer->companion_rcs_cmd_buffer) {
302       reset_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer, flags);
303       destroy_cmd_buffer(cmd_buffer->companion_rcs_cmd_buffer);
304       cmd_buffer->companion_rcs_cmd_buffer = NULL;
305    }
306 
307    ANV_RMV(cmd_buffer_destroy, cmd_buffer->device, cmd_buffer);
308 
309    reset_cmd_buffer(cmd_buffer, flags);
310 }
311 
312 const struct vk_command_buffer_ops anv_cmd_buffer_ops = {
313    .create = anv_create_cmd_buffer,
314    .reset = anv_cmd_buffer_reset,
315    .destroy = anv_cmd_buffer_destroy,
316 };
317 
318 void
anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer * cmd_buffer)319 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
320 {
321    const struct intel_device_info *devinfo = cmd_buffer->device->info;
322    anv_genX(devinfo, cmd_buffer_emit_state_base_address)(cmd_buffer);
323 }
324 
325 void
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,VkImageAspectFlagBits aspect,enum isl_aux_usage aux_usage,uint32_t level,uint32_t base_layer,uint32_t layer_count)326 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
327                                   const struct anv_image *image,
328                                   VkImageAspectFlagBits aspect,
329                                   enum isl_aux_usage aux_usage,
330                                   uint32_t level,
331                                   uint32_t base_layer,
332                                   uint32_t layer_count)
333 {
334    const struct intel_device_info *devinfo = cmd_buffer->device->info;
335    anv_genX(devinfo, cmd_buffer_mark_image_written)(cmd_buffer, image,
336                                                     aspect, aux_usage,
337                                                     level, base_layer,
338                                                     layer_count);
339 }
340 
341 void
anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer * cmd_buffer,const struct anv_image * image,const enum isl_format format,union isl_color_value clear_color)342 anv_cmd_buffer_mark_image_fast_cleared(struct anv_cmd_buffer *cmd_buffer,
343                                        const struct anv_image *image,
344                                        const enum isl_format format,
345                                        union isl_color_value clear_color)
346 {
347    const struct intel_device_info *devinfo = cmd_buffer->device->info;
348    anv_genX(devinfo, set_fast_clear_state)(cmd_buffer, image, format,
349                                            clear_color);
350 }
351 
352 void
anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer * cmd_buffer,struct anv_state state,const struct anv_image * image)353 anv_cmd_buffer_load_clear_color_from_image(struct anv_cmd_buffer *cmd_buffer,
354                                            struct anv_state state,
355                                            const struct anv_image *image)
356 {
357    const struct intel_device_info *devinfo = cmd_buffer->device->info;
358    anv_genX(devinfo, load_image_clear_color)(cmd_buffer, state, image);
359 }
360 
361 void
anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer * cmd_buffer)362 anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
363 {
364    const struct intel_device_info *devinfo = cmd_buffer->device->info;
365    anv_genX(devinfo, cmd_emit_conditional_render_predicate)(cmd_buffer);
366 }
367 
368 static void
clear_pending_query_bits(enum anv_query_bits * query_bits,enum anv_pipe_bits flushed_bits)369 clear_pending_query_bits(enum anv_query_bits *query_bits,
370                          enum anv_pipe_bits flushed_bits)
371 {
372    if (flushed_bits & ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
373       *query_bits &= ~ANV_QUERY_WRITES_RT_FLUSH;
374 
375    if (flushed_bits & ANV_PIPE_TILE_CACHE_FLUSH_BIT)
376       *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
377 
378    if ((flushed_bits & ANV_PIPE_DATA_CACHE_FLUSH_BIT) &&
379        (flushed_bits & ANV_PIPE_HDC_PIPELINE_FLUSH_BIT) &&
380        (flushed_bits & ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT))
381       *query_bits &= ~ANV_QUERY_WRITES_TILE_FLUSH;
382 
383    /* Once RT/TILE have been flushed, we can consider the CS_STALL flush */
384    if ((*query_bits & (ANV_QUERY_WRITES_TILE_FLUSH |
385                        ANV_QUERY_WRITES_RT_FLUSH |
386                        ANV_QUERY_WRITES_DATA_FLUSH)) == 0 &&
387        (flushed_bits & (ANV_PIPE_END_OF_PIPE_SYNC_BIT | ANV_PIPE_CS_STALL_BIT)))
388       *query_bits &= ~ANV_QUERY_WRITES_CS_STALL;
389 }
390 
391 void
anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer * cmd_buffer,enum anv_pipe_bits flushed_bits)392 anv_cmd_buffer_update_pending_query_bits(struct anv_cmd_buffer *cmd_buffer,
393                                          enum anv_pipe_bits flushed_bits)
394 {
395    clear_pending_query_bits(&cmd_buffer->state.queries.clear_bits, flushed_bits);
396    clear_pending_query_bits(&cmd_buffer->state.queries.buffer_write_bits, flushed_bits);
397 }
398 
399 static bool
mem_update(void * dst,const void * src,size_t size)400 mem_update(void *dst, const void *src, size_t size)
401 {
402    if (memcmp(dst, src, size) == 0)
403       return false;
404 
405    memcpy(dst, src, size);
406    return true;
407 }
408 
409 static void
set_dirty_for_bind_map(struct anv_cmd_buffer * cmd_buffer,gl_shader_stage stage,const struct anv_pipeline_bind_map * map)410 set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
411                        gl_shader_stage stage,
412                        const struct anv_pipeline_bind_map *map)
413 {
414    assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s));
415    if (mem_update(cmd_buffer->state.surface_sha1s[stage],
416                   map->surface_sha1, sizeof(map->surface_sha1)))
417       cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
418 
419    assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s));
420    if (mem_update(cmd_buffer->state.sampler_sha1s[stage],
421                   map->sampler_sha1, sizeof(map->sampler_sha1)))
422       cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
423 
424    assert(stage < ARRAY_SIZE(cmd_buffer->state.push_sha1s));
425    if (mem_update(cmd_buffer->state.push_sha1s[stage],
426                   map->push_sha1, sizeof(map->push_sha1)))
427       cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage);
428 }
429 
430 static void
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_pipeline_state * pipeline_state,struct anv_pipeline * pipeline,VkShaderStageFlags stages)431 anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
432                                     struct anv_cmd_pipeline_state *pipeline_state,
433                                     struct anv_pipeline *pipeline,
434                                     VkShaderStageFlags stages)
435 {
436    struct anv_device *device = cmd_buffer->device;
437 
438    uint64_t ray_shadow_size =
439       align64(brw_rt_ray_queries_shadow_stacks_size(device->info,
440                                                     pipeline->ray_queries),
441               4096);
442    if (ray_shadow_size > 0 &&
443        (!cmd_buffer->state.ray_query_shadow_bo ||
444         cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
445       unsigned shadow_size_log2 = MAX2(util_logbase2_ceil(ray_shadow_size), 16);
446       unsigned bucket = shadow_size_log2 - 16;
447       assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos));
448 
449       struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[bucket]);
450       if (bo == NULL) {
451          struct anv_bo *new_bo;
452          VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
453                                                ray_shadow_size,
454                                                0, /* alloc_flags */
455                                                0, /* explicit_address */
456                                                &new_bo);
457          if (result != VK_SUCCESS) {
458             anv_batch_set_error(&cmd_buffer->batch, result);
459             return;
460          }
461 
462          bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[bucket], NULL, new_bo);
463          if (bo != NULL) {
464             anv_device_release_bo(device, bo);
465          } else {
466             bo = new_bo;
467          }
468       }
469       cmd_buffer->state.ray_query_shadow_bo = bo;
470 
471       /* Add the ray query buffers to the batch list. */
472       anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
473                             cmd_buffer->state.ray_query_shadow_bo);
474    }
475 
476    /* Add the HW buffer to the list of BO used. */
477    anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
478                          device->ray_query_bo);
479 
480    /* Fill the push constants & mark them dirty. */
481    struct anv_state ray_query_global_state =
482       anv_genX(device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
483 
484    struct anv_address ray_query_globals_addr =
485       anv_state_pool_state_address(&device->dynamic_state_pool,
486                                    ray_query_global_state);
487    pipeline_state->push_constants.ray_query_globals =
488       anv_address_physical(ray_query_globals_addr);
489    cmd_buffer->state.push_constants_dirty |= stages;
490 }
491 
492 /**
493  * This function compute changes between 2 pipelines and flags the dirty HW
494  * state appropriately.
495  */
496 static void
anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer * cmd_buffer,struct anv_graphics_pipeline * old_pipeline,struct anv_graphics_pipeline * new_pipeline)497 anv_cmd_buffer_flush_pipeline_state(struct anv_cmd_buffer *cmd_buffer,
498                                     struct anv_graphics_pipeline *old_pipeline,
499                                     struct anv_graphics_pipeline *new_pipeline)
500 {
501    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
502    struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
503 
504 #define diff_fix_state(bit, name)                                       \
505    do {                                                                 \
506       /* Fixed states should always have matching sizes */              \
507       assert(old_pipeline == NULL ||                                    \
508              old_pipeline->name.len == new_pipeline->name.len);         \
509       /* Don't bother memcmp if the state is already dirty */           \
510       if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) &&         \
511           (old_pipeline == NULL ||                                      \
512            memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
513                   &new_pipeline->batch_data[new_pipeline->name.offset], \
514                   4 * new_pipeline->name.len) != 0))                    \
515          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit);              \
516    } while (0)
517 #define diff_var_state(bit, name)                                       \
518    do {                                                                 \
519       /* Don't bother memcmp if the state is already dirty */           \
520       /* Also if the new state is empty, avoid marking dirty */         \
521       if (!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_##bit) &&         \
522           new_pipeline->name.len != 0 &&                                \
523           (old_pipeline == NULL ||                                      \
524            old_pipeline->name.len != new_pipeline->name.len ||          \
525            memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
526                   &new_pipeline->batch_data[new_pipeline->name.offset], \
527                   4 * new_pipeline->name.len) != 0))                    \
528          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit);              \
529    } while (0)
530 #define assert_identical(bit, name)                                     \
531    do {                                                                 \
532       /* Fixed states should always have matching sizes */              \
533       assert(old_pipeline == NULL ||                                    \
534              old_pipeline->name.len == new_pipeline->name.len);         \
535       assert(old_pipeline == NULL ||                                    \
536              memcmp(&old_pipeline->batch_data[old_pipeline->name.offset], \
537                     &new_pipeline->batch_data[new_pipeline->name.offset], \
538                     4 * new_pipeline->name.len) == 0);                  \
539    } while (0)
540 #define assert_empty(name) assert(new_pipeline->name.len == 0)
541 
542    /* Compare all states, including partial packed ones, the dynamic part is
543     * left at 0 but the static part could still change.
544     */
545    diff_fix_state(URB,                      final.urb);
546    diff_fix_state(VF_SGVS,                  final.vf_sgvs);
547    if (cmd_buffer->device->info->ver >= 11)
548       diff_fix_state(VF_SGVS_2,             final.vf_sgvs_2);
549    if (cmd_buffer->device->info->ver >= 12)
550       diff_fix_state(PRIMITIVE_REPLICATION, final.primitive_replication);
551    diff_fix_state(SBE,                      final.sbe);
552    diff_fix_state(SBE_SWIZ,                 final.sbe_swiz);
553    diff_fix_state(MULTISAMPLE,              final.ms);
554    diff_fix_state(VS,                       final.vs);
555    diff_fix_state(HS,                       final.hs);
556    diff_fix_state(DS,                       final.ds);
557    diff_fix_state(PS,                       final.ps);
558 
559    diff_fix_state(CLIP,                     partial.clip);
560    diff_fix_state(SF,                       partial.sf);
561    diff_fix_state(RASTER,                   partial.raster);
562    diff_fix_state(WM,                       partial.wm);
563    diff_fix_state(STREAMOUT,                partial.so);
564    diff_fix_state(GS,                       partial.gs);
565    diff_fix_state(TE,                       partial.te);
566    diff_fix_state(VFG,                      partial.vfg);
567    diff_fix_state(PS_EXTRA,                 partial.ps_extra);
568 
569    if (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader) {
570       diff_fix_state(TASK_CONTROL,          final.task_control);
571       diff_fix_state(TASK_SHADER,           final.task_shader);
572       diff_fix_state(TASK_REDISTRIB,        final.task_redistrib);
573       diff_fix_state(MESH_CONTROL,          final.mesh_control);
574       diff_fix_state(MESH_SHADER,           final.mesh_shader);
575       diff_fix_state(MESH_DISTRIB,          final.mesh_distrib);
576       diff_fix_state(CLIP_MESH,             final.clip_mesh);
577       diff_fix_state(SBE_MESH,              final.sbe_mesh);
578    } else {
579       assert_empty(final.task_control);
580       assert_empty(final.task_shader);
581       assert_empty(final.task_redistrib);
582       assert_empty(final.mesh_control);
583       assert_empty(final.mesh_shader);
584       assert_empty(final.mesh_distrib);
585       assert_empty(final.clip_mesh);
586       assert_empty(final.sbe_mesh);
587    }
588 
589    /* States that should never vary between pipelines, but can be affected by
590     * blorp etc...
591     */
592    assert_identical(VF_STATISTICS,            final.vf_statistics);
593 
594    /* States that can vary in length */
595    diff_var_state(VF_SGVS_INSTANCING,       final.vf_sgvs_instancing);
596    diff_var_state(SO_DECL_LIST,             final.so_decl_list);
597 
598 #undef diff_fix_state
599 #undef diff_var_state
600 #undef assert_identical
601 #undef assert_empty
602 
603    /* We're not diffing the following :
604     *    - anv_graphics_pipeline::vertex_input_data
605     *    - anv_graphics_pipeline::final::vf_instancing
606     *
607     * since they are tracked by the runtime.
608     */
609 }
610 
anv_CmdBindPipeline(VkCommandBuffer commandBuffer,VkPipelineBindPoint pipelineBindPoint,VkPipeline _pipeline)611 void anv_CmdBindPipeline(
612     VkCommandBuffer                             commandBuffer,
613     VkPipelineBindPoint                         pipelineBindPoint,
614     VkPipeline                                  _pipeline)
615 {
616    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
617    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
618    struct anv_cmd_pipeline_state *state;
619    VkShaderStageFlags stages = 0;
620 
621    switch (pipelineBindPoint) {
622    case VK_PIPELINE_BIND_POINT_COMPUTE: {
623       if (cmd_buffer->state.compute.base.pipeline == pipeline)
624          return;
625 
626       cmd_buffer->state.compute.base.pipeline = pipeline;
627       cmd_buffer->state.compute.pipeline_dirty = true;
628 
629       struct anv_compute_pipeline *compute_pipeline =
630          anv_pipeline_to_compute(pipeline);
631       set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
632                              &compute_pipeline->cs->bind_map);
633 
634       state = &cmd_buffer->state.compute.base;
635       stages = VK_SHADER_STAGE_COMPUTE_BIT;
636       break;
637    }
638 
639    case VK_PIPELINE_BIND_POINT_GRAPHICS: {
640       struct anv_graphics_pipeline *new_pipeline =
641          anv_pipeline_to_graphics(pipeline);
642 
643       /* Apply the non dynamic state from the pipeline */
644       vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
645                                         &new_pipeline->dynamic_state);
646 
647       if (cmd_buffer->state.gfx.base.pipeline == pipeline)
648          return;
649 
650       struct anv_graphics_pipeline *old_pipeline =
651          cmd_buffer->state.gfx.base.pipeline == NULL ? NULL :
652          anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
653 
654       cmd_buffer->state.gfx.base.pipeline = pipeline;
655       cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
656 
657       anv_foreach_stage(stage, new_pipeline->base.base.active_stages) {
658          set_dirty_for_bind_map(cmd_buffer, stage,
659                                 &new_pipeline->base.shaders[stage]->bind_map);
660       }
661 
662       state = &cmd_buffer->state.gfx.base;
663       stages = new_pipeline->base.base.active_stages;
664 
665 
666       /* When the pipeline is using independent states and dynamic buffers,
667        * this will trigger an update of anv_push_constants::dynamic_base_index
668        * & anv_push_constants::dynamic_offsets.
669        */
670       struct anv_push_constants *push =
671          &cmd_buffer->state.gfx.base.push_constants;
672       struct anv_pipeline_sets_layout *layout = &new_pipeline->base.base.layout;
673       if (layout->independent_sets && layout->num_dynamic_buffers > 0) {
674          bool modified = false;
675          for (uint32_t s = 0; s < layout->num_sets; s++) {
676             if (layout->set[s].layout == NULL)
677                continue;
678 
679             assert(layout->set[s].dynamic_offset_start < MAX_DYNAMIC_BUFFERS);
680             if (layout->set[s].layout->dynamic_offset_count > 0 &&
681                 (push->desc_surface_offsets[s] & ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK) !=
682                 layout->set[s].dynamic_offset_start) {
683                push->desc_surface_offsets[s] &= ~ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK;
684                push->desc_surface_offsets[s] |= (layout->set[s].dynamic_offset_start &
685                                                  ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
686                modified = true;
687             }
688          }
689          if (modified)
690             cmd_buffer->state.push_constants_dirty |= stages;
691       }
692 
693       if ((new_pipeline->fs_msaa_flags & INTEL_MSAA_FLAG_ENABLE_DYNAMIC) &&
694           push->gfx.fs_msaa_flags != new_pipeline->fs_msaa_flags) {
695          push->gfx.fs_msaa_flags = new_pipeline->fs_msaa_flags;
696          cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
697       }
698       if (new_pipeline->dynamic_patch_control_points) {
699          cmd_buffer->state.push_constants_dirty |=
700             VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
701       }
702 
703       anv_cmd_buffer_flush_pipeline_state(cmd_buffer, old_pipeline, new_pipeline);
704       break;
705    }
706 
707    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
708       if (cmd_buffer->state.rt.base.pipeline == pipeline)
709          return;
710 
711       cmd_buffer->state.rt.base.pipeline = pipeline;
712       cmd_buffer->state.rt.pipeline_dirty = true;
713 
714       struct anv_ray_tracing_pipeline *rt_pipeline =
715          anv_pipeline_to_ray_tracing(pipeline);
716       if (rt_pipeline->stack_size > 0) {
717          anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
718                                                   rt_pipeline->stack_size);
719       }
720 
721       state = &cmd_buffer->state.rt.base;
722       break;
723    }
724 
725    default:
726       unreachable("invalid bind point");
727       break;
728    }
729 
730    if (pipeline->ray_queries > 0)
731       anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
732 }
733 
734 static void
anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,struct anv_pipeline_sets_layout * layout,uint32_t set_index,struct anv_descriptor_set * set,uint32_t * dynamic_offset_count,const uint32_t ** dynamic_offsets)735 anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
736                                    VkPipelineBindPoint bind_point,
737                                    struct anv_pipeline_sets_layout *layout,
738                                    uint32_t set_index,
739                                    struct anv_descriptor_set *set,
740                                    uint32_t *dynamic_offset_count,
741                                    const uint32_t **dynamic_offsets)
742 {
743    /* Either we have no pool because it's a push descriptor or the pool is not
744     * host only :
745     *
746     * VUID-vkCmdBindDescriptorSets-pDescriptorSets-04616:
747     *
748     *    "Each element of pDescriptorSets must not have been allocated from a
749     *     VkDescriptorPool with the
750     *     VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT flag set"
751     */
752    assert(!set->pool || !set->pool->host_only);
753 
754    struct anv_descriptor_set_layout *set_layout = set->layout;
755    VkShaderStageFlags stages = set_layout->shader_stages;
756    struct anv_cmd_pipeline_state *pipe_state;
757 
758    switch (bind_point) {
759    case VK_PIPELINE_BIND_POINT_GRAPHICS:
760       stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
761                 (cmd_buffer->device->vk.enabled_extensions.EXT_mesh_shader ?
762                       (VK_SHADER_STAGE_TASK_BIT_EXT |
763                        VK_SHADER_STAGE_MESH_BIT_EXT) : 0);
764       pipe_state = &cmd_buffer->state.gfx.base;
765       break;
766 
767    case VK_PIPELINE_BIND_POINT_COMPUTE:
768       stages &= VK_SHADER_STAGE_COMPUTE_BIT;
769       pipe_state = &cmd_buffer->state.compute.base;
770       break;
771 
772    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
773       stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
774                 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
775                 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
776                 VK_SHADER_STAGE_MISS_BIT_KHR |
777                 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
778                 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
779       pipe_state = &cmd_buffer->state.rt.base;
780       break;
781 
782    default:
783       unreachable("invalid bind point");
784    }
785 
786    VkShaderStageFlags dirty_stages = 0;
787    /* If it's a push descriptor set, we have to flag things as dirty
788     * regardless of whether or not the CPU-side data structure changed as we
789     * may have edited in-place.
790     */
791    if (pipe_state->descriptors[set_index] != set ||
792          anv_descriptor_set_is_push(set)) {
793       pipe_state->descriptors[set_index] = set;
794 
795       /* When using indirect descriptors, stages that have access to the HW
796        * binding tables, never need to access the
797        * anv_push_constants::desc_surface_offsets fields, because any data
798        * they need from the descriptor buffer is accessible through a binding
799        * table entry. For stages that are "bindless" (Mesh/Task/RT), we need
800        * to provide anv_push_constants::desc_surface_offsets matching the bound
801        * descriptor so that shaders can access the descriptor buffer through
802        * A64 messages.
803        *
804        * With direct descriptors, the shaders can use the
805        * anv_push_constants::desc_surface_offsets to build bindless offsets.
806        * So it's we always need to update the push constant data.
807        */
808       bool update_desc_sets =
809          !cmd_buffer->device->physical->indirect_descriptors ||
810          (stages & (VK_SHADER_STAGE_TASK_BIT_EXT |
811                     VK_SHADER_STAGE_MESH_BIT_EXT |
812                     VK_SHADER_STAGE_RAYGEN_BIT_KHR |
813                     VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
814                     VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
815                     VK_SHADER_STAGE_MISS_BIT_KHR |
816                     VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
817                     VK_SHADER_STAGE_CALLABLE_BIT_KHR));
818 
819       if (update_desc_sets) {
820          struct anv_push_constants *push = &pipe_state->push_constants;
821 
822          uint64_t offset =
823             anv_address_physical(set->desc_surface_addr) -
824             cmd_buffer->device->physical->va.internal_surface_state_pool.addr;
825          assert((offset & ~ANV_DESCRIPTOR_SET_OFFSET_MASK) == 0);
826          push->desc_surface_offsets[set_index] &= ~ANV_DESCRIPTOR_SET_OFFSET_MASK;
827          push->desc_surface_offsets[set_index] |= offset;
828          push->desc_sampler_offsets[set_index] |=
829             anv_address_physical(set->desc_sampler_addr) -
830             cmd_buffer->device->physical->va.dynamic_state_pool.addr;
831 
832          anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
833                                set->desc_surface_addr.bo);
834          anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
835                                set->desc_sampler_addr.bo);
836       }
837 
838       dirty_stages |= stages;
839    }
840 
841    if (dynamic_offsets) {
842       if (set_layout->dynamic_offset_count > 0) {
843          struct anv_push_constants *push = &pipe_state->push_constants;
844          uint32_t dynamic_offset_start =
845             layout->set[set_index].dynamic_offset_start;
846          uint32_t *push_offsets =
847             &push->dynamic_offsets[dynamic_offset_start];
848 
849          memcpy(pipe_state->dynamic_offsets[set_index].offsets,
850                 *dynamic_offsets,
851                 sizeof(uint32_t) * MIN2(*dynamic_offset_count,
852                                         set_layout->dynamic_offset_count));
853 
854          /* Assert that everything is in range */
855          assert(set_layout->dynamic_offset_count <= *dynamic_offset_count);
856          assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
857                 ARRAY_SIZE(push->dynamic_offsets));
858 
859          for (uint32_t i = 0; i < set_layout->dynamic_offset_count; i++) {
860             if (push_offsets[i] != (*dynamic_offsets)[i]) {
861                pipe_state->dynamic_offsets[set_index].offsets[i] =
862                   push_offsets[i] = (*dynamic_offsets)[i];
863                /* dynamic_offset_stages[] elements could contain blanket
864                 * values like VK_SHADER_STAGE_ALL, so limit this to the
865                 * binding point's bits.
866                 */
867                dirty_stages |= set_layout->dynamic_offset_stages[i] & stages;
868             }
869          }
870 
871          *dynamic_offsets += set_layout->dynamic_offset_count;
872          *dynamic_offset_count -= set_layout->dynamic_offset_count;
873       }
874    }
875 
876    if (set->is_push)
877       cmd_buffer->state.push_descriptors_dirty |= dirty_stages;
878    else
879       cmd_buffer->state.descriptors_dirty |= dirty_stages;
880    cmd_buffer->state.push_constants_dirty |= dirty_stages;
881 }
882 
883 #define ANV_GRAPHICS_STAGE_BITS \
884    (VK_SHADER_STAGE_ALL_GRAPHICS | \
885     VK_SHADER_STAGE_MESH_BIT_EXT | \
886     VK_SHADER_STAGE_TASK_BIT_EXT)
887 
888 #define ANV_RT_STAGE_BITS \
889    (VK_SHADER_STAGE_RAYGEN_BIT_KHR | \
890     VK_SHADER_STAGE_ANY_HIT_BIT_KHR | \
891     VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | \
892     VK_SHADER_STAGE_MISS_BIT_KHR | \
893     VK_SHADER_STAGE_INTERSECTION_BIT_KHR | \
894     VK_SHADER_STAGE_CALLABLE_BIT_KHR)
895 
anv_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pInfo)896 void anv_CmdBindDescriptorSets2KHR(
897     VkCommandBuffer                             commandBuffer,
898     const VkBindDescriptorSetsInfoKHR*          pInfo)
899 {
900    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
901    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
902    struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
903 
904    assert(pInfo->firstSet + pInfo->descriptorSetCount <= MAX_SETS);
905 
906    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
907       uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
908       const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
909 
910       for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
911          ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
912          if (set == NULL)
913             continue;
914          anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
915                                             VK_PIPELINE_BIND_POINT_COMPUTE,
916                                             layout, pInfo->firstSet + i, set,
917                                             &dynamicOffsetCount,
918                                             &pDynamicOffsets);
919       }
920    }
921    if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
922       uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
923       const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
924 
925       for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
926          ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
927          if (set == NULL)
928             continue;
929          anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
930                                             VK_PIPELINE_BIND_POINT_GRAPHICS,
931                                             layout, pInfo->firstSet + i, set,
932                                             &dynamicOffsetCount,
933                                             &pDynamicOffsets);
934       }
935    }
936    if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
937       uint32_t dynamicOffsetCount = pInfo->dynamicOffsetCount;
938       const uint32_t *pDynamicOffsets = pInfo->pDynamicOffsets;
939 
940       for (uint32_t i = 0; i < pInfo->descriptorSetCount; i++) {
941          ANV_FROM_HANDLE(anv_descriptor_set, set, pInfo->pDescriptorSets[i]);
942          if (set == NULL)
943             continue;
944          anv_cmd_buffer_bind_descriptor_set(cmd_buffer,
945                                             VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
946                                             layout, pInfo->firstSet + i, set,
947                                             &dynamicOffsetCount,
948                                             &pDynamicOffsets);
949       }
950    }
951 }
952 
anv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes,const VkDeviceSize * pStrides)953 void anv_CmdBindVertexBuffers2(
954    VkCommandBuffer                              commandBuffer,
955    uint32_t                                     firstBinding,
956    uint32_t                                     bindingCount,
957    const VkBuffer*                              pBuffers,
958    const VkDeviceSize*                          pOffsets,
959    const VkDeviceSize*                          pSizes,
960    const VkDeviceSize*                          pStrides)
961 {
962    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
963    struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
964 
965    /* We have to defer setting up vertex buffer since we need the buffer
966     * stride from the pipeline. */
967 
968    assert(firstBinding + bindingCount <= MAX_VBS);
969    for (uint32_t i = 0; i < bindingCount; i++) {
970       ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
971 
972       if (buffer == NULL) {
973          vb[firstBinding + i] = (struct anv_vertex_binding) {
974             .buffer = NULL,
975          };
976       } else {
977          vb[firstBinding + i] = (struct anv_vertex_binding) {
978             .buffer = buffer,
979             .offset = pOffsets[i],
980             .size = vk_buffer_range(&buffer->vk, pOffsets[i],
981                                     pSizes ? pSizes[i] : VK_WHOLE_SIZE),
982          };
983       }
984       cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
985    }
986 
987    if (pStrides != NULL) {
988       vk_cmd_set_vertex_binding_strides(&cmd_buffer->vk, firstBinding,
989                                         bindingCount, pStrides);
990    }
991 }
992 
anv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer,uint32_t firstBinding,uint32_t bindingCount,const VkBuffer * pBuffers,const VkDeviceSize * pOffsets,const VkDeviceSize * pSizes)993 void anv_CmdBindTransformFeedbackBuffersEXT(
994     VkCommandBuffer                             commandBuffer,
995     uint32_t                                    firstBinding,
996     uint32_t                                    bindingCount,
997     const VkBuffer*                             pBuffers,
998     const VkDeviceSize*                         pOffsets,
999     const VkDeviceSize*                         pSizes)
1000 {
1001    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1002    struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
1003 
1004    /* We have to defer setting up vertex buffer since we need the buffer
1005     * stride from the pipeline. */
1006 
1007    assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
1008    for (uint32_t i = 0; i < bindingCount; i++) {
1009       if (pBuffers[i] == VK_NULL_HANDLE) {
1010          xfb[firstBinding + i].buffer = NULL;
1011       } else {
1012          ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
1013          xfb[firstBinding + i].buffer = buffer;
1014          xfb[firstBinding + i].offset = pOffsets[i];
1015          xfb[firstBinding + i].size =
1016             vk_buffer_range(&buffer->vk, pOffsets[i],
1017                             pSizes ? pSizes[i] : VK_WHOLE_SIZE);
1018       }
1019    }
1020 }
1021 
1022 enum isl_format
anv_isl_format_for_descriptor_type(const struct anv_device * device,VkDescriptorType type)1023 anv_isl_format_for_descriptor_type(const struct anv_device *device,
1024                                    VkDescriptorType type)
1025 {
1026    switch (type) {
1027    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1028    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1029       return device->physical->compiler->indirect_ubos_use_sampler ?
1030              ISL_FORMAT_R32G32B32A32_FLOAT : ISL_FORMAT_RAW;
1031 
1032    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1033    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1034       return ISL_FORMAT_RAW;
1035 
1036    default:
1037       unreachable("Invalid descriptor type");
1038    }
1039 }
1040 
1041 struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer * cmd_buffer,const void * data,uint32_t size,uint32_t alignment)1042 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
1043                             const void *data, uint32_t size, uint32_t alignment)
1044 {
1045    struct anv_state state;
1046 
1047    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
1048    memcpy(state.map, data, size);
1049 
1050    VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
1051 
1052    return state;
1053 }
1054 
1055 struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer * cmd_buffer,uint32_t * a,uint32_t * b,uint32_t dwords,uint32_t alignment)1056 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
1057                              uint32_t *a, uint32_t *b,
1058                              uint32_t dwords, uint32_t alignment)
1059 {
1060    struct anv_state state;
1061    uint32_t *p;
1062 
1063    state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1064                                               dwords * 4, alignment);
1065    p = state.map;
1066    for (uint32_t i = 0; i < dwords; i++)
1067       p[i] = a[i] | b[i];
1068 
1069    VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
1070 
1071    return state;
1072 }
1073 
1074 struct anv_state
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer * cmd_buffer)1075 anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
1076 {
1077    struct anv_push_constants *data =
1078       &cmd_buffer->state.gfx.base.push_constants;
1079 
1080    struct anv_state state =
1081       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1082                                          sizeof(struct anv_push_constants),
1083                                          32 /* bottom 5 bits MBZ */);
1084    memcpy(state.map, data, sizeof(struct anv_push_constants));
1085 
1086    return state;
1087 }
1088 
1089 struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer * cmd_buffer)1090 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
1091 {
1092    const struct intel_device_info *devinfo = cmd_buffer->device->info;
1093    struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1094    struct anv_push_constants *data = &pipe_state->push_constants;
1095    struct anv_compute_pipeline *pipeline =
1096       anv_pipeline_to_compute(cmd_buffer->state.compute.base.pipeline);
1097    const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1098    const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
1099 
1100    const struct intel_cs_dispatch_info dispatch =
1101       brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1102    const unsigned total_push_constants_size =
1103       brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
1104    if (total_push_constants_size == 0)
1105       return (struct anv_state) { .offset = 0 };
1106 
1107    const unsigned push_constant_alignment = 64;
1108    const unsigned aligned_total_push_constants_size =
1109       ALIGN(total_push_constants_size, push_constant_alignment);
1110    struct anv_state state;
1111    if (devinfo->verx10 >= 125) {
1112       state = anv_state_stream_alloc(&cmd_buffer->general_state_stream,
1113                                      aligned_total_push_constants_size,
1114                                      push_constant_alignment);
1115    } else {
1116       state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1117                                                  aligned_total_push_constants_size,
1118                                                  push_constant_alignment);
1119    }
1120    if (state.map == NULL)
1121       return state;
1122 
1123    void *dst = state.map;
1124    const void *src = (char *)data + (range->start * 32);
1125 
1126    if (cs_prog_data->push.cross_thread.size > 0) {
1127       memcpy(dst, src, cs_prog_data->push.cross_thread.size);
1128       dst += cs_prog_data->push.cross_thread.size;
1129       src += cs_prog_data->push.cross_thread.size;
1130    }
1131 
1132    if (cs_prog_data->push.per_thread.size > 0) {
1133       for (unsigned t = 0; t < dispatch.threads; t++) {
1134          memcpy(dst, src, cs_prog_data->push.per_thread.size);
1135 
1136          uint32_t *subgroup_id = dst +
1137             offsetof(struct anv_push_constants, cs.subgroup_id) -
1138             (range->start * 32 + cs_prog_data->push.cross_thread.size);
1139          *subgroup_id = t;
1140 
1141          dst += cs_prog_data->push.per_thread.size;
1142       }
1143    }
1144 
1145    return state;
1146 }
1147 
anv_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pInfo)1148 void anv_CmdPushConstants2KHR(
1149     VkCommandBuffer                             commandBuffer,
1150     const VkPushConstantsInfoKHR*               pInfo)
1151 {
1152    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1153 
1154    if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS) {
1155       struct anv_cmd_pipeline_state *pipe_state =
1156          &cmd_buffer->state.gfx.base;
1157 
1158       memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1159              pInfo->pValues, pInfo->size);
1160    }
1161    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
1162       struct anv_cmd_pipeline_state *pipe_state =
1163          &cmd_buffer->state.compute.base;
1164 
1165       memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1166              pInfo->pValues, pInfo->size);
1167    }
1168    if (pInfo->stageFlags & ANV_RT_STAGE_BITS) {
1169       struct anv_cmd_pipeline_state *pipe_state =
1170          &cmd_buffer->state.rt.base;
1171 
1172       memcpy(pipe_state->push_constants.client_data + pInfo->offset,
1173              pInfo->pValues, pInfo->size);
1174    }
1175 
1176    cmd_buffer->state.push_constants_dirty |= pInfo->stageFlags;
1177 }
1178 
1179 static struct anv_cmd_pipeline_state *
anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point)1180 anv_cmd_buffer_get_pipe_state(struct anv_cmd_buffer *cmd_buffer,
1181                               VkPipelineBindPoint bind_point)
1182 {
1183    switch (bind_point) {
1184    case VK_PIPELINE_BIND_POINT_GRAPHICS:
1185       return &cmd_buffer->state.gfx.base;
1186    case VK_PIPELINE_BIND_POINT_COMPUTE:
1187       return &cmd_buffer->state.compute.base;
1188    case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
1189       return &cmd_buffer->state.rt.base;
1190       break;
1191    default:
1192       unreachable("invalid bind point");
1193    }
1194 }
1195 
1196 static void
anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer * cmd_buffer,VkPipelineBindPoint bind_point,const VkPushDescriptorSetInfoKHR * pInfo)1197 anv_cmd_buffer_push_descriptor_sets(struct anv_cmd_buffer *cmd_buffer,
1198                                     VkPipelineBindPoint bind_point,
1199                                     const VkPushDescriptorSetInfoKHR *pInfo)
1200 {
1201    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1202    struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1203 
1204    assert(pInfo->set < MAX_SETS);
1205 
1206    struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1207 
1208    struct anv_push_descriptor_set *push_set =
1209       &anv_cmd_buffer_get_pipe_state(cmd_buffer,
1210                                      bind_point)->push_descriptor;
1211    if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1212       return;
1213 
1214    anv_descriptor_set_write(cmd_buffer->device, &push_set->set,
1215                             pInfo->descriptorWriteCount,
1216                             pInfo->pDescriptorWrites);
1217 
1218    anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point,
1219                                       layout, pInfo->set, &push_set->set,
1220                                       NULL, NULL);
1221 }
1222 
anv_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pInfo)1223 void anv_CmdPushDescriptorSet2KHR(
1224     VkCommandBuffer                            commandBuffer,
1225     const VkPushDescriptorSetInfoKHR*          pInfo)
1226 {
1227    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1228 
1229    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
1230       anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1231                                           VK_PIPELINE_BIND_POINT_COMPUTE,
1232                                           pInfo);
1233    if (pInfo->stageFlags & ANV_GRAPHICS_STAGE_BITS)
1234       anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1235                                           VK_PIPELINE_BIND_POINT_GRAPHICS,
1236                                           pInfo);
1237    if (pInfo->stageFlags & ANV_RT_STAGE_BITS)
1238       anv_cmd_buffer_push_descriptor_sets(cmd_buffer,
1239                                           VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR,
1240                                           pInfo);
1241 }
1242 
anv_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pInfo)1243 void anv_CmdPushDescriptorSetWithTemplate2KHR(
1244     VkCommandBuffer                                commandBuffer,
1245     const VkPushDescriptorSetWithTemplateInfoKHR*  pInfo)
1246 {
1247    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1248    VK_FROM_HANDLE(vk_descriptor_update_template, template,
1249                   pInfo->descriptorUpdateTemplate);
1250    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pInfo->layout);
1251    struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
1252 
1253    assert(pInfo->set < MAX_PUSH_DESCRIPTORS);
1254 
1255    struct anv_descriptor_set_layout *set_layout = layout->set[pInfo->set].layout;
1256 
1257    struct anv_push_descriptor_set *push_set =
1258       &anv_cmd_buffer_get_pipe_state(cmd_buffer,
1259                                      template->bind_point)->push_descriptor;
1260    if (!anv_push_descriptor_set_init(cmd_buffer, push_set, set_layout))
1261       return;
1262 
1263    anv_descriptor_set_write_template(cmd_buffer->device, &push_set->set,
1264                                      template,
1265                                      pInfo->pData);
1266 
1267    anv_cmd_buffer_bind_descriptor_set(cmd_buffer, template->bind_point,
1268                                       layout, pInfo->set, &push_set->set,
1269                                       NULL, NULL);
1270 }
1271 
anv_CmdSetRayTracingPipelineStackSizeKHR(VkCommandBuffer commandBuffer,uint32_t pipelineStackSize)1272 void anv_CmdSetRayTracingPipelineStackSizeKHR(
1273     VkCommandBuffer                             commandBuffer,
1274     uint32_t                                    pipelineStackSize)
1275 {
1276    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1277    struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
1278    struct anv_device *device = cmd_buffer->device;
1279 
1280    if (anv_batch_has_error(&cmd_buffer->batch))
1281       return;
1282 
1283    uint32_t stack_ids_per_dss = 2048; /* TODO */
1284 
1285    unsigned stack_size_log2 = util_logbase2_ceil(pipelineStackSize);
1286    if (stack_size_log2 < 10)
1287       stack_size_log2 = 10;
1288 
1289    if (rt->scratch.layout.total_size == 1 << stack_size_log2)
1290       return;
1291 
1292    brw_rt_compute_scratch_layout(&rt->scratch.layout, device->info,
1293                                  stack_ids_per_dss, 1 << stack_size_log2);
1294 
1295    unsigned bucket = stack_size_log2 - 10;
1296    assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
1297 
1298    struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
1299    if (bo == NULL) {
1300       struct anv_bo *new_bo;
1301       VkResult result = anv_device_alloc_bo(device, "RT scratch",
1302                                             rt->scratch.layout.total_size,
1303                                             0, /* alloc_flags */
1304                                             0, /* explicit_address */
1305                                             &new_bo);
1306       if (result != VK_SUCCESS) {
1307          rt->scratch.layout.total_size = 0;
1308          anv_batch_set_error(&cmd_buffer->batch, result);
1309          return;
1310       }
1311 
1312       bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
1313       if (bo != NULL) {
1314          anv_device_release_bo(device, bo);
1315       } else {
1316          bo = new_bo;
1317       }
1318    }
1319 
1320    rt->scratch.bo = bo;
1321 }
1322 
1323 void
anv_cmd_buffer_save_state(struct anv_cmd_buffer * cmd_buffer,uint32_t flags,struct anv_cmd_saved_state * state)1324 anv_cmd_buffer_save_state(struct anv_cmd_buffer *cmd_buffer,
1325                           uint32_t flags,
1326                           struct anv_cmd_saved_state *state)
1327 {
1328    state->flags = flags;
1329 
1330    /* we only support the compute pipeline at the moment */
1331    assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1332    const struct anv_cmd_pipeline_state *pipe_state =
1333       &cmd_buffer->state.compute.base;
1334 
1335    if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE)
1336       state->pipeline = pipe_state->pipeline;
1337 
1338    if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0)
1339       state->descriptor_set = pipe_state->descriptors[0];
1340 
1341    if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1342       memcpy(state->push_constants, pipe_state->push_constants.client_data,
1343              sizeof(state->push_constants));
1344    }
1345 }
1346 
1347 void
anv_cmd_buffer_restore_state(struct anv_cmd_buffer * cmd_buffer,struct anv_cmd_saved_state * state)1348 anv_cmd_buffer_restore_state(struct anv_cmd_buffer *cmd_buffer,
1349                              struct anv_cmd_saved_state *state)
1350 {
1351    VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
1352 
1353    assert(state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE);
1354    const VkPipelineBindPoint bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1355    const VkShaderStageFlags stage_flags = VK_SHADER_STAGE_COMPUTE_BIT;
1356    struct anv_cmd_pipeline_state *pipe_state = &cmd_buffer->state.compute.base;
1357 
1358    if (state->flags & ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE) {
1359        if (state->pipeline) {
1360           anv_CmdBindPipeline(cmd_buffer_, bind_point,
1361                               anv_pipeline_to_handle(state->pipeline));
1362        } else {
1363           pipe_state->pipeline = NULL;
1364        }
1365    }
1366 
1367    if (state->flags & ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0) {
1368       if (state->descriptor_set) {
1369          anv_cmd_buffer_bind_descriptor_set(cmd_buffer, bind_point, NULL, 0,
1370                                             state->descriptor_set, NULL, NULL);
1371       } else {
1372          pipe_state->descriptors[0] = NULL;
1373       }
1374    }
1375 
1376    if (state->flags & ANV_CMD_SAVED_STATE_PUSH_CONSTANTS) {
1377       VkPushConstantsInfoKHR push_info = {
1378          .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
1379          .layout = VK_NULL_HANDLE,
1380          .stageFlags = stage_flags,
1381          .offset = 0,
1382          .size = sizeof(state->push_constants),
1383          .pValues = state->push_constants,
1384       };
1385       anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
1386    }
1387 }
1388