• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/brw_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29 
30 #include "vk_enum_to_str.h"
31 
32 #include "genxml/genX_bits.h"
33 
34 /* Sampler tables don't actually have a maximum size but we pick one just so
35  * that we don't end up emitting too much state on-the-fly.
36  */
37 #define MAX_SAMPLER_TABLE_SIZE 128
38 #define BINDLESS_OFFSET        255
39 
40 enum binding_property {
41    BINDING_PROPERTY_NORMAL            = BITFIELD_BIT(0),
42    BINDING_PROPERTY_PUSHABLE          = BITFIELD_BIT(1),
43    BINDING_PROPERTY_EMBEDDED_SAMPLER  = BITFIELD_BIT(2),
44    BINDING_PROPERTY_NO_BINDING_TABLE  = BITFIELD_BIT(3),
45 };
46 
47 struct apply_pipeline_layout_state {
48    const struct anv_physical_device *pdevice;
49 
50    const struct anv_pipeline_sets_layout *layout;
51    nir_address_format desc_addr_format;
52    nir_address_format ssbo_addr_format;
53    nir_address_format ubo_addr_format;
54 
55    /* Place to flag lowered instructions so we don't lower them twice */
56    struct set *lowered_instrs;
57 
58    bool uses_constants;
59    bool has_dynamic_buffers;
60    bool has_independent_sets;
61    uint8_t constants_offset;
62    struct {
63       bool desc_buffer_used;
64       uint8_t desc_offset;
65 
66       struct anv_binding_apply_layout {
67          uint8_t use_count;
68 
69          /* Binding table offset */
70          uint8_t surface_offset;
71 
72          /* Sampler table offset */
73          uint8_t sampler_offset;
74 
75          /* Embedded sampler index */
76          uint16_t embedded_sampler_index;
77 
78          /* Properties of the binding */
79          enum binding_property properties;
80 
81          /* For each binding is identified with a unique identifier for push
82           * computation.
83           */
84          uint32_t push_block;
85       } *binding;
86    } set[MAX_SETS];
87 };
88 
89 /* For a given binding, tells us how many binding table entries are needed per
90  * element.
91  */
92 static uint32_t
bti_multiplier(const struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)93 bti_multiplier(const struct apply_pipeline_layout_state *state,
94                uint32_t set, uint32_t binding)
95 {
96    const struct anv_descriptor_set_layout *set_layout =
97       state->layout->set[set].layout;
98    const struct anv_descriptor_set_binding_layout *bind_layout =
99       &set_layout->binding[binding];
100 
101    return bind_layout->max_plane_count;
102 }
103 
104 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)105 addr_format_for_desc_type(VkDescriptorType desc_type,
106                           struct apply_pipeline_layout_state *state)
107 {
108    switch (desc_type) {
109    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
110    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
111       return state->ssbo_addr_format;
112 
113    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
114    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
115       return state->ubo_addr_format;
116 
117    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
118       return state->desc_addr_format;
119 
120    default:
121       unreachable("Unsupported descriptor type");
122    }
123 }
124 
125 static struct anv_binding_apply_layout *
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)126 add_binding(struct apply_pipeline_layout_state *state,
127             uint32_t set, uint32_t binding)
128 {
129    const struct anv_descriptor_set_layout *set_layout =
130       state->layout->set[set].layout;
131    const struct anv_descriptor_set_binding_layout *bind_layout =
132       &set_layout->binding[binding];
133 
134    assert(set < state->layout->num_sets);
135    assert(binding < state->layout->set[set].layout->binding_count);
136 
137    if (state->set[set].binding[binding].use_count < UINT8_MAX)
138       state->set[set].binding[binding].use_count++;
139 
140    /* Only flag the descriptor buffer as used if there's actually data for
141     * this binding.  This lets us be lazy and call this function constantly
142     * without worrying about unnecessarily enabling the buffer.
143     */
144    if (bind_layout->descriptor_surface_stride)
145       state->set[set].desc_buffer_used = true;
146 
147    if (bind_layout->dynamic_offset_index >= 0)
148       state->has_dynamic_buffers = true;
149 
150    state->set[set].binding[binding].properties |= BINDING_PROPERTY_NORMAL;
151 
152    if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT)
153       state->set[set].binding[binding].properties |= BINDING_PROPERTY_EMBEDDED_SAMPLER;
154 
155    return &state->set[set].binding[binding];
156 }
157 
158 const VkDescriptorSetLayoutCreateFlags non_pushable_set_flags =
159    VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
160    VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT;
161 
162 const VkDescriptorBindingFlags non_pushable_binding_flags =
163    VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT |
164    VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT |
165    VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT;
166 
167 static void
add_binding_type(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding,VkDescriptorType type)168 add_binding_type(struct apply_pipeline_layout_state *state,
169                  uint32_t set, uint32_t binding, VkDescriptorType type)
170 {
171    add_binding(state, set, binding);
172 
173    const struct anv_descriptor_set_layout *set_layout =
174       state->layout->set[set].layout;
175    const struct anv_descriptor_set_binding_layout *bind_layout =
176       &set_layout->binding[binding];
177 
178    /* We can't push descriptor buffers but we can for push descriptors */
179    const bool is_set_pushable =
180       (set_layout->flags & non_pushable_set_flags) == 0 ||
181       set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
182    const bool is_binding_pushable =
183       (bind_layout->flags & non_pushable_binding_flags) == 0;
184 
185    if (is_set_pushable && is_binding_pushable &&
186        (state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
187         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
188         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
189         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) &&
190        (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
191         type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK))
192       state->set[set].binding[binding].properties |= BINDING_PROPERTY_PUSHABLE;
193 }
194 
195 static struct anv_binding_apply_layout *
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)196 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
197 {
198    nir_deref_instr *deref = nir_src_as_deref(src);
199    nir_variable *var = nir_deref_instr_get_variable(deref);
200    return add_binding(state, var->data.descriptor_set, var->data.binding);
201 }
202 
203 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)204 add_tex_src_binding(struct apply_pipeline_layout_state *state,
205                     nir_tex_instr *tex, nir_tex_src_type deref_src_type)
206 {
207    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
208    if (deref_src_idx < 0)
209       return;
210 
211    struct anv_binding_apply_layout *layout =
212       add_deref_src_binding(state, tex->src[deref_src_idx].src);
213 
214    /* This is likely a fallout of Wa_14020375314 but hasn't fully be
215     * understood by HW people yet.
216     *
217     * In HSD-18037984222 we reported that the render target index given
218     * through a descriptor in the address register is broken. I think the same
219     * issue happening here when we use a descriptor given by the address
220     * register for the sampler and when the
221     * RENDER_SURFACE_STATE::EnableSamplerRoutetoLSC bit is enabled. This seems
222     * to affect only texelFetch() operations.
223     *
224     * We probably don't want to loose the performance benefit of the route to
225     * LSC so instead we disable dynamic descriptors by checking if a binding
226     * array is accessed with a non constant value.
227     *
228     * Fixes a bunch of tests in dEQP-VK.binding_model.*.index_push_constant.*
229     */
230    if (state->pdevice->info.ver >= 20 && tex->op == nir_texop_txf) {
231       nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
232       if (deref->deref_type != nir_deref_type_var) {
233          assert(deref->deref_type == nir_deref_type_array);
234          if (!nir_src_is_const(deref->arr.index))
235             layout->properties |= BINDING_PROPERTY_NO_BINDING_TABLE;
236       }
237    }
238 }
239 
240 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)241 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
242 {
243    struct apply_pipeline_layout_state *state = _state;
244 
245    switch (instr->type) {
246    case nir_instr_type_intrinsic: {
247       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
248       switch (intrin->intrinsic) {
249       case nir_intrinsic_vulkan_resource_index:
250          add_binding_type(state,
251                           nir_intrinsic_desc_set(intrin),
252                           nir_intrinsic_binding(intrin),
253                           nir_intrinsic_desc_type(intrin));
254          break;
255 
256       case nir_intrinsic_image_deref_load:
257       case nir_intrinsic_image_deref_store:
258       case nir_intrinsic_image_deref_atomic:
259       case nir_intrinsic_image_deref_atomic_swap:
260       case nir_intrinsic_image_deref_size:
261       case nir_intrinsic_image_deref_samples:
262       case nir_intrinsic_image_deref_load_param_intel:
263       case nir_intrinsic_image_deref_load_raw_intel:
264       case nir_intrinsic_image_deref_store_raw_intel:
265       case nir_intrinsic_image_deref_sparse_load:
266          add_deref_src_binding(state, intrin->src[0]);
267          break;
268 
269       case nir_intrinsic_load_constant:
270          state->uses_constants = true;
271          break;
272 
273       default:
274          break;
275       }
276       break;
277    }
278    case nir_instr_type_tex: {
279       nir_tex_instr *tex = nir_instr_as_tex(instr);
280       add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
281       add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
282       break;
283    }
284    default:
285       break;
286    }
287 
288    return false;
289 }
290 
291 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)292 find_descriptor_for_index_src(nir_src src,
293                               struct apply_pipeline_layout_state *state)
294 {
295    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
296 
297    while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
298       intrin = nir_src_as_intrinsic(intrin->src[0]);
299 
300    if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
301       return NULL;
302 
303    return intrin;
304 }
305 
306 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)307 descriptor_has_bti(nir_intrinsic_instr *intrin,
308                    struct apply_pipeline_layout_state *state)
309 {
310    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
311 
312    uint32_t set = nir_intrinsic_desc_set(intrin);
313    uint32_t binding = nir_intrinsic_binding(intrin);
314    const struct anv_descriptor_set_binding_layout *bind_layout =
315       &state->layout->set[set].layout->binding[binding];
316 
317    if (state->set[set].binding[binding].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER)
318       return false;
319 
320    uint32_t surface_index;
321    if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
322       surface_index = state->set[set].desc_offset;
323    else
324       surface_index = state->set[set].binding[binding].surface_offset;
325 
326    /* Only lower to a BTI message if we have a valid binding table index. */
327    return surface_index < MAX_BINDING_TABLE_SIZE;
328 }
329 
330 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)331 descriptor_address_format(nir_intrinsic_instr *intrin,
332                           struct apply_pipeline_layout_state *state)
333 {
334    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
335 
336    return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
337 }
338 
339 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)340 nir_deref_find_descriptor(nir_deref_instr *deref,
341                           struct apply_pipeline_layout_state *state)
342 {
343    while (1) {
344       /* Nothing we will use this on has a variable */
345       assert(deref->deref_type != nir_deref_type_var);
346 
347       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
348       if (!parent)
349          break;
350 
351       deref = parent;
352    }
353    assert(deref->deref_type == nir_deref_type_cast);
354 
355    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
356    if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
357       return NULL;
358 
359    return find_descriptor_for_index_src(intrin->src[0], state);
360 }
361 
362 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,const struct apply_pipeline_layout_state * state)363 build_load_descriptor_mem(nir_builder *b,
364                           nir_def *desc_addr, unsigned desc_offset,
365                           unsigned num_components, unsigned bit_size,
366                           const struct apply_pipeline_layout_state *state)
367 
368 {
369    switch (state->desc_addr_format) {
370    case nir_address_format_64bit_global_32bit_offset: {
371       nir_def *base_addr =
372          nir_pack_64_2x32(b, nir_trim_vector(b, desc_addr, 2));
373       nir_def *offset32 =
374          nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset);
375 
376       return nir_load_global_constant_offset(b, num_components, bit_size,
377                                              base_addr, offset32,
378                                              .align_mul = 8,
379                                              .align_offset = desc_offset % 8);
380    }
381 
382    case nir_address_format_32bit_index_offset: {
383       nir_def *surface_index = nir_channel(b, desc_addr, 0);
384       nir_def *offset32 =
385          nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
386 
387       return nir_load_ubo(b, num_components, bit_size,
388                           surface_index, offset32,
389                           .align_mul = 8,
390                           .align_offset = desc_offset % 8,
391                           .range_base = 0,
392                           .range = num_components * bit_size / 8);
393    }
394 
395    default:
396       unreachable("Unsupported address format");
397    }
398 }
399 
400 /* When using direct descriptor, we do not have a structure to read in memory
401  * like anv_address_range_descriptor where all the fields match perfectly the
402  * vec4 address format we need to generate for A64 messages. Instead we need
403  * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
404  * enough for the surface address, lot less fun for the size where you have to
405  * combine 3 fields scattered over multiple dwords, add one to the total and
406  * do a check against the surface type to deal with the null descriptors.
407  *
408  * Fortunately we can reuse the Auxiliary surface adddress field to stash our
409  * buffer size and just load a vec4.
410  */
411 static nir_def *
build_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)412 build_optimized_load_render_surface_state_address(nir_builder *b,
413                                                   nir_def *desc_addr,
414                                                   struct apply_pipeline_layout_state *state)
415 
416 {
417    const struct intel_device_info *devinfo = &state->pdevice->info;
418 
419    nir_def *surface_addr =
420       build_load_descriptor_mem(b, desc_addr,
421                                 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
422                                 4, 32, state);
423    nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
424    nir_def *addr_udw = nir_channel(b, surface_addr, 1);
425    nir_def *length = nir_channel(b, surface_addr, 3);
426 
427    return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
428 }
429 
430 /* When using direct descriptor, we do not have a structure to read in memory
431  * like anv_address_range_descriptor where all the fields match perfectly the
432  * vec4 address format we need to generate for A64 messages. Instead we need
433  * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
434  * enough for the surface address, lot less fun for the size.
435  */
436 static nir_def *
build_non_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)437 build_non_optimized_load_render_surface_state_address(nir_builder *b,
438                                                       nir_def *desc_addr,
439                                                       struct apply_pipeline_layout_state *state)
440 
441 {
442    const struct intel_device_info *devinfo = &state->pdevice->info;
443 
444    assert(((RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) +
445             RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo) - 1) -
446            RENDER_SURFACE_STATE_Width_start(devinfo)) / 8 <= 32);
447 
448    nir_def *surface_addr =
449       build_load_descriptor_mem(b, desc_addr,
450                                 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
451                                 DIV_ROUND_UP(RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo), 32),
452                                 32, state);
453    nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
454    nir_def *addr_udw = nir_channel(b, surface_addr, 1);
455 
456    /* Take all the RENDER_SURFACE_STATE fields from the beginning of the
457     * structure up to the Depth field.
458     */
459    const uint32_t type_sizes_dwords =
460       DIV_ROUND_UP(RENDER_SURFACE_STATE_Depth_start(devinfo) +
461                    RENDER_SURFACE_STATE_Depth_bits(devinfo), 32);
462    nir_def *type_sizes =
463       build_load_descriptor_mem(b, desc_addr, 0, type_sizes_dwords, 32, state);
464 
465    const unsigned width_start = RENDER_SURFACE_STATE_Width_start(devinfo);
466    /* SKL PRMs, Volume 2d: Command Reference: Structures, RENDER_SURFACE_STATE
467     *
468     *    Width:  "bits [6:0]   of the number of entries in the buffer - 1"
469     *    Height: "bits [20:7]  of the number of entries in the buffer - 1"
470     *    Depth:  "bits [31:21] of the number of entries in the buffer - 1"
471     */
472    const unsigned width_bits = 7;
473    nir_def *width =
474       nir_iand_imm(b,
475                    nir_ishr_imm(b,
476                                 nir_channel(b, type_sizes, width_start / 32),
477                                 width_start % 32),
478                    (1u << width_bits) - 1);
479 
480    const unsigned height_start = RENDER_SURFACE_STATE_Height_start(devinfo);
481    const unsigned height_bits = RENDER_SURFACE_STATE_Height_bits(devinfo);
482    nir_def *height =
483       nir_iand_imm(b,
484                    nir_ishr_imm(b,
485                                 nir_channel(b, type_sizes, height_start / 32),
486                                 height_start % 32),
487                    (1u << height_bits) - 1);
488 
489    const unsigned depth_start = RENDER_SURFACE_STATE_Depth_start(devinfo);
490    const unsigned depth_bits = RENDER_SURFACE_STATE_Depth_bits(devinfo);
491    nir_def *depth =
492       nir_iand_imm(b,
493                    nir_ishr_imm(b,
494                                 nir_channel(b, type_sizes, depth_start / 32),
495                                 depth_start % 32),
496                    (1u << depth_bits) - 1);
497 
498    nir_def *length = width;
499    length = nir_ior(b, length, nir_ishl_imm(b, height, width_bits));
500    length = nir_ior(b, length, nir_ishl_imm(b, depth, width_bits + height_bits));
501    length = nir_iadd_imm(b, length, 1);
502 
503    /* Check the surface type, if it's SURFTYPE_NULL, set the length of the
504     * buffer to 0.
505     */
506    const unsigned type_start = RENDER_SURFACE_STATE_SurfaceType_start(devinfo);
507    const unsigned type_dw = type_start / 32;
508    nir_def *type =
509       nir_iand_imm(b,
510                    nir_ishr_imm(b,
511                                 nir_channel(b, type_sizes, type_dw),
512                                 type_start % 32),
513                    (1u << RENDER_SURFACE_STATE_SurfaceType_bits(devinfo)) - 1);
514 
515    length = nir_bcsel(b,
516                       nir_ieq_imm(b, type, 7 /* SURFTYPE_NULL */),
517                       nir_imm_int(b, 0), length);
518 
519    return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
520 }
521 
522 static inline nir_def *
build_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)523 build_load_render_surface_state_address(nir_builder *b,
524                                         nir_def *desc_addr,
525                                         struct apply_pipeline_layout_state *state)
526 {
527    if (state->pdevice->isl_dev.buffer_length_in_aux_addr)
528       return build_optimized_load_render_surface_state_address(b, desc_addr, state);
529    /* Wa_14019708328 */
530    return build_non_optimized_load_render_surface_state_address(b, desc_addr, state);
531 }
532 
533 /* Load the depth of a 3D storage image.
534  *
535  * Either by reading the indirect descriptor value, or reading the value from
536  * RENDER_SURFACE_STATE.
537  *
538  * This is necessary for VK_EXT_image_sliced_view_of_3d.
539  */
540 static nir_def *
build_load_storage_3d_image_depth(nir_builder * b,nir_def * desc_addr,nir_def * resinfo_depth,struct apply_pipeline_layout_state * state)541 build_load_storage_3d_image_depth(nir_builder *b,
542                                   nir_def *desc_addr,
543                                   nir_def *resinfo_depth,
544                                   struct apply_pipeline_layout_state *state)
545 
546 {
547    const struct intel_device_info *devinfo = &state->pdevice->info;
548 
549    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
550       return build_load_descriptor_mem(
551          b, desc_addr,
552          offsetof(struct anv_storage_image_descriptor, image_depth),
553          1, 32, state);
554    } else {
555       nir_def *data = build_load_descriptor_mem(
556          b, desc_addr,
557          RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) / 8,
558          1, 32, state);
559       nir_def *depth =
560          nir_ushr_imm(
561             b, data,
562             RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) % 32);
563       depth = nir_iand_imm(
564          b, depth,
565          (1u << RENDER_SURFACE_STATE_RenderTargetViewExtent_bits(devinfo)) - 1);
566       depth = nir_iadd_imm(b, depth, 1);
567 
568       /* Return the minimum between the RESINFO value and the
569        * RENDER_SURFACE_STATE::RenderTargetViewExtent value.
570        *
571        * Both are expressed for the current view LOD, but in the case of a
572        * SURFTYPE_NULL, RESINFO will return the right value, while the -1
573        * value in RENDER_SURFACE_STATE should be ignored.
574        */
575       return nir_umin(b, resinfo_depth, depth);
576    }
577 }
578 
579 static nir_def *
build_load_desc_set_dynamic_index(nir_builder * b,unsigned set_idx)580 build_load_desc_set_dynamic_index(nir_builder *b, unsigned set_idx)
581 {
582    return nir_iand_imm(
583       b,
584       anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx]),
585       ANV_DESCRIPTOR_SET_DYNAMIC_INDEX_MASK);
586 }
587 
588 static nir_def *
build_load_desc_address(nir_builder * b,nir_def * set_idx,unsigned set_idx_imm,const struct apply_pipeline_layout_state * state)589 build_load_desc_address(nir_builder *b, nir_def *set_idx, unsigned set_idx_imm,
590                         const struct apply_pipeline_layout_state *state)
591 {
592    nir_def *desc_offset = set_idx != NULL ?
593       anv_load_driver_uniform_indexed(b, 1, desc_surface_offsets, set_idx) :
594       anv_load_driver_uniform(b, 1, desc_surface_offsets[set_idx_imm]);
595    desc_offset = nir_iand_imm(b, desc_offset, ANV_DESCRIPTOR_SET_OFFSET_MASK);
596    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER &&
597        !state->pdevice->uses_ex_bso) {
598       nir_def *bindless_base_offset =
599          anv_load_driver_uniform(b, 1, surfaces_base_offset);
600       desc_offset = nir_iadd(b, bindless_base_offset, desc_offset);
601    }
602    return nir_pack_64_2x32_split(
603       b, desc_offset,
604       nir_load_reloc_const_intel(
605          b,
606          state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER ?
607          BRW_SHADER_RELOC_DESCRIPTORS_BUFFER_ADDR_HIGH :
608          BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH));
609 }
610 
611 /** Build a Vulkan resource index
612  *
613  * A "resource index" is the term used by our SPIR-V parser and the relevant
614  * NIR intrinsics for a reference into a descriptor set.  It acts much like a
615  * deref in NIR except that it accesses opaque descriptors instead of memory.
616  *
617  * Coming out of SPIR-V, both the resource indices (in the form of
618  * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
619  * of nir_deref_instr) use the same vector component/bit size.  The meaning
620  * of those values for memory derefs (nir_deref_instr) is given by the
621  * nir_address_format associated with the descriptor type.  For resource
622  * indices, it's an entirely internal to ANV encoding which describes, in some
623  * sense, the address of the descriptor.  Thanks to the NIR/SPIR-V rules, it
624  * must be packed into the same size SSA values as a memory address.  For this
625  * reason, the actual encoding may depend both on the address format for
626  * memory derefs and the descriptor address format.
627  *
628  * The load_vulkan_descriptor intrinsic exists to provide a transition point
629  * between these two forms of derefs: descriptor and memory.
630  */
631 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,struct apply_pipeline_layout_state * state)632 build_res_index(nir_builder *b,
633                 uint32_t set, uint32_t binding,
634                 nir_def *array_index,
635                 struct apply_pipeline_layout_state *state)
636 {
637    const struct anv_descriptor_set_binding_layout *bind_layout =
638       &state->layout->set[set].layout->binding[binding];
639 
640    uint32_t array_size = bind_layout->array_size;
641 
642    uint32_t set_idx;
643    switch (state->desc_addr_format) {
644    case nir_address_format_64bit_global_32bit_offset:
645       /* Descriptor set buffer accesses will go through A64 messages, so the
646        * index to get the descriptor set buffer address is located in the
647        * anv_push_constants::desc_surface_offsets and it's indexed by the set
648        * number.
649        */
650       set_idx = set;
651       break;
652 
653    case nir_address_format_32bit_index_offset:
654       /* Descriptor set buffer accesses will go through the binding table. The
655        * offset is the entry in the binding table.
656        */
657       assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
658       set_idx = state->set[set].desc_offset;
659       break;
660 
661    default:
662       unreachable("Unsupported address format");
663    }
664 
665    assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
666       nir_def *dynamic_offset_index;
667       if (bind_layout->dynamic_offset_index >= 0) {
668          if (state->has_independent_sets) {
669             nir_def *dynamic_offset_start =
670                build_load_desc_set_dynamic_index(b, set);
671             dynamic_offset_index =
672                nir_iadd_imm(b, dynamic_offset_start,
673                             bind_layout->dynamic_offset_index);
674          } else {
675             dynamic_offset_index =
676                nir_imm_int(b,
677                            state->layout->set[set].dynamic_offset_start +
678                            bind_layout->dynamic_offset_index);
679          }
680       } else {
681          dynamic_offset_index = nir_imm_int(b, 0xff); /* No dynamic offset */
682       }
683 
684    const uint32_t desc_bti = state->set[set].binding[binding].surface_offset;
685    /* We don't care about the stride field for inline uniforms (see
686     * build_desc_addr_for_res_index), but for anything else we should be
687     * aligned to 8 bytes because we store a multiple of 8 in the packed info
688     * to be able to encode a stride up to 2040 (8 * 255).
689     */
690    assert(bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
691           bind_layout->descriptor_surface_stride % 8 == 0);
692    const uint32_t desc_stride =
693       bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ? 0 :
694       bind_layout->descriptor_surface_stride / 8;
695 
696       nir_def *packed =
697          nir_ior_imm(b,
698                      dynamic_offset_index,
699                      (desc_stride << 24) |
700                      (desc_bti << 16)    |
701                      (set_idx << 8));
702 
703 
704    return nir_vec4(b, packed,
705                       nir_imm_int(b, bind_layout->descriptor_surface_offset),
706                       nir_imm_int(b, array_size - 1),
707                       array_index);
708 }
709 
710 struct res_index_defs {
711    nir_def *bti_idx;
712    nir_def *set_idx;
713    nir_def *dyn_offset_base;
714    nir_def *desc_offset_base;
715    nir_def *array_index;
716    nir_def *desc_stride;
717 };
718 
719 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)720 unpack_res_index(nir_builder *b, nir_def *index)
721 {
722    struct res_index_defs defs;
723 
724    nir_def *packed = nir_channel(b, index, 0);
725    defs.desc_stride =
726       nir_imul_imm(b, nir_extract_u8(b, packed, nir_imm_int(b, 3)), 8);
727    defs.bti_idx = nir_extract_u8(b, packed, nir_imm_int(b, 2));
728    defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
729    defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
730 
731    defs.desc_offset_base = nir_channel(b, index, 1);
732    defs.array_index = nir_channel(b, index, 3);
733 
734    return defs;
735 }
736 
737 /** Whether a surface is accessed through the bindless surface state heap */
738 static bool
is_binding_bindless(unsigned set,unsigned binding,bool sampler,const struct apply_pipeline_layout_state * state)739 is_binding_bindless(unsigned set, unsigned binding, bool sampler,
740                     const struct apply_pipeline_layout_state *state)
741 {
742    /* Has binding table entry has been allocated for this binding? */
743    if (sampler &&
744        state->set[set].binding[binding].sampler_offset != BINDLESS_OFFSET)
745       return false;
746    if (!sampler &&
747        state->set[set].binding[binding].surface_offset != BINDLESS_OFFSET)
748       return false;
749 
750    return true;
751 }
752 
753 /** Adjust a Vulkan resource index
754  *
755  * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
756  * For array descriptors, it allows us to adjust the array index.  Thanks to
757  * variable pointers, we cannot always fold this re-index operation into the
758  * vulkan_resource_index intrinsic and we have to do it based on nothing but
759  * the address format.
760  */
761 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta)762 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta)
763 {
764    return nir_vec4(b, nir_channel(b, orig, 0),
765                       nir_channel(b, orig, 1),
766                       nir_channel(b, orig, 2),
767                       nir_iadd(b, nir_channel(b, orig, 3), delta));
768 }
769 
770 /** Get the address for a descriptor given its resource index
771  *
772  * Because of the re-indexing operations, we can't bounds check descriptor
773  * array access until we have the final index.  That means we end up doing the
774  * bounds check here, if needed.  See unpack_res_index() for more details.
775  *
776  * This function takes both a bind_layout and a desc_type which are used to
777  * determine the descriptor stride for array descriptors.  The bind_layout is
778  * optional for buffer descriptor types.
779  */
780 static nir_def *
build_desc_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)781 build_desc_addr_for_res_index(nir_builder *b,
782                               const VkDescriptorType desc_type,
783                               nir_def *index, nir_address_format addr_format,
784                               struct apply_pipeline_layout_state *state)
785 {
786    struct res_index_defs res = unpack_res_index(b, index);
787 
788    nir_def *desc_offset = res.desc_offset_base;
789    if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
790       /* Compute the actual descriptor offset.  For inline uniform blocks,
791        * the array index is ignored as they are only allowed to be a single
792        * descriptor (not an array) and there is no concept of a "stride".
793        *
794        */
795       desc_offset =
796          nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
797    }
798 
799    switch (addr_format) {
800    case nir_address_format_64bit_global_32bit_offset:
801    case nir_address_format_64bit_bounded_global: {
802       switch (state->desc_addr_format) {
803       case nir_address_format_64bit_global_32bit_offset: {
804          nir_def *base_addr =
805             build_load_desc_address(b, res.set_idx, 0, state);
806          return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
807                             nir_unpack_64_2x32_split_y(b, base_addr),
808                             nir_imm_int(b, UINT32_MAX),
809                             desc_offset);
810       }
811 
812       case nir_address_format_32bit_index_offset:
813          return nir_vec2(b, res.set_idx, desc_offset);
814 
815       default:
816          unreachable("Unhandled address format");
817       }
818    }
819 
820    case nir_address_format_32bit_index_offset:
821       assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
822       assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
823       return nir_vec2(b, res.set_idx, desc_offset);
824 
825    default:
826       unreachable("Unhandled address format");
827    }
828 }
829 
830 static nir_def *
build_desc_addr_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,const struct apply_pipeline_layout_state * state)831 build_desc_addr_for_binding(nir_builder *b,
832                             unsigned set, unsigned binding,
833                             nir_def *array_index, unsigned plane,
834                             const struct apply_pipeline_layout_state *state)
835 {
836    const struct anv_descriptor_set_binding_layout *bind_layout =
837       &state->layout->set[set].layout->binding[binding];
838 
839    switch (state->desc_addr_format) {
840    case nir_address_format_64bit_global_32bit_offset:
841    case nir_address_format_64bit_bounded_global: {
842       nir_def *set_addr = build_load_desc_address(b, NULL, set, state);
843       nir_def *desc_offset =
844          nir_iadd_imm(b,
845                       nir_imul_imm(b,
846                                    array_index,
847                                    bind_layout->descriptor_surface_stride),
848                       bind_layout->descriptor_surface_offset);
849       if (plane != 0) {
850          desc_offset = nir_iadd_imm(
851             b, desc_offset, plane * bind_layout->descriptor_data_surface_size);
852       }
853 
854       return nir_vec4(b, nir_unpack_64_2x32_split_x(b, set_addr),
855                          nir_unpack_64_2x32_split_y(b, set_addr),
856                          nir_imm_int(b, UINT32_MAX),
857                          desc_offset);
858    }
859 
860    case nir_address_format_32bit_index_offset: {
861       nir_def *desc_offset =
862          nir_iadd_imm(b,
863                       nir_imul_imm(b,
864                                    array_index,
865                                    bind_layout->descriptor_surface_stride),
866                       bind_layout->descriptor_surface_offset);
867       if (plane != 0) {
868          desc_offset = nir_iadd_imm(
869             b, desc_offset, plane * bind_layout->descriptor_data_surface_size);
870       }
871       return nir_vec2(b,
872                       nir_imm_int(b, state->set[set].desc_offset),
873                       desc_offset);
874    }
875 
876    default:
877       unreachable("Unhandled address format");
878    }
879 }
880 
881 static unsigned
binding_descriptor_offset(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)882 binding_descriptor_offset(const struct apply_pipeline_layout_state *state,
883                           const struct anv_descriptor_set_binding_layout *bind_layout,
884                           bool sampler)
885 {
886    if (sampler &&
887        state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
888       return bind_layout->descriptor_sampler_offset;
889 
890    return bind_layout->descriptor_surface_offset;
891 }
892 
893 static unsigned
binding_descriptor_stride(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)894 binding_descriptor_stride(const struct apply_pipeline_layout_state *state,
895                           const struct anv_descriptor_set_binding_layout *bind_layout,
896                           bool sampler)
897 {
898    if (sampler &&
899        state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
900       return bind_layout->descriptor_sampler_stride;
901 
902    return bind_layout->descriptor_surface_stride;
903 }
904 
905 static nir_def *
build_surface_index_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)906 build_surface_index_for_binding(nir_builder *b,
907                                 unsigned set, unsigned binding,
908                                 nir_def *array_index,
909                                 unsigned plane,
910                                 bool non_uniform,
911                                 const struct apply_pipeline_layout_state *state)
912 {
913    const struct anv_descriptor_set_binding_layout *bind_layout =
914       &state->layout->set[set].layout->binding[binding];
915    const unsigned descriptor_offset =
916       binding_descriptor_offset(state, bind_layout, false /* sampler */);
917    const unsigned descriptor_stride =
918       binding_descriptor_stride(state, bind_layout, false /* sampler */);
919    const bool is_bindless =
920       is_binding_bindless(set, binding, false /* sampler */, state);
921 
922    nir_def *set_offset, *surface_index;
923    if (is_bindless) {
924       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
925          set_offset = nir_imm_int(b, 0xdeaddead);
926 
927          nir_def *desc_addr =
928             build_desc_addr_for_binding(b, set, binding, array_index,
929                                         plane, state);
930 
931          surface_index =
932             build_load_descriptor_mem(b, desc_addr, 0, 1, 32, state);
933       } else {
934          set_offset = anv_load_driver_uniform(b, 1, desc_surface_offsets[set]);
935 
936          /* With bindless indexes are offsets in the descriptor buffer */
937          surface_index =
938             nir_iadd_imm(b,
939                          nir_imul_imm(b, array_index, descriptor_stride),
940                          descriptor_offset);
941          if (plane != 0) {
942             assert(plane < bind_layout->max_plane_count);
943             surface_index = nir_iadd_imm(b, surface_index,
944                                          plane * (descriptor_stride /
945                                                   bind_layout->max_plane_count));
946          }
947 
948          assert(descriptor_offset % 64 == 0);
949          assert(descriptor_stride % 64 == 0);
950       }
951    } else {
952       /* Unused */
953       set_offset = nir_imm_int(b, 0xdeaddead);
954 
955       unsigned bti_stride = bti_multiplier(state, set, binding);
956       assert(bti_stride >= 1);
957 
958       /* For Ycbcr descriptors, add the plane offset */
959       unsigned element_index = plane;
960 
961       /* With the binding table, it's an index in the table */
962       surface_index =
963          nir_iadd_imm(b, nir_imul_imm(b, array_index, bti_stride),
964                          state->set[set].binding[binding].surface_offset + element_index);
965       assert(state->set[set].binding[binding].surface_offset < MAX_BINDING_TABLE_SIZE);
966    }
967 
968    return nir_resource_intel(b,
969                              set_offset,
970                              surface_index,
971                              array_index,
972                              nir_imm_int(b, 0) /* bindless_base_offset */,
973                              .desc_set = set,
974                              .binding = binding,
975                              .resource_block_intel = state->set[set].binding[binding].push_block,
976                              .resource_access_intel =
977                                 (is_bindless ? nir_resource_intel_bindless : 0) |
978                                 (non_uniform ? nir_resource_intel_non_uniform : 0) |
979                                 ((state->set[set].binding[binding].properties &
980                                   BINDING_PROPERTY_PUSHABLE) ? nir_resource_intel_pushable : 0));
981 }
982 
983 static nir_def *
build_sampler_handle_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)984 build_sampler_handle_for_binding(nir_builder *b,
985                                  unsigned set, unsigned binding,
986                                  nir_def *array_index,
987                                  unsigned plane,
988                                  bool non_uniform,
989                                  const struct apply_pipeline_layout_state *state)
990 {
991    const struct anv_descriptor_set_binding_layout *bind_layout =
992       &state->layout->set[set].layout->binding[binding];
993    const unsigned descriptor_offset =
994       binding_descriptor_offset(state, bind_layout, true /* sampler */);
995    const unsigned descriptor_stride =
996       binding_descriptor_stride(state, bind_layout, true /* sampler */);
997    const bool is_embedded =
998       state->set[set].binding[binding].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER;
999    const bool is_bindless =
1000       is_binding_bindless(set, binding, true /* sampler */, state);
1001    nir_def *set_offset, *sampler_index, *sampler_base_offset = nir_imm_int(b, 0);
1002 
1003    if (is_embedded) {
1004       set_offset = nir_imm_int(b, 0xdeaddead);
1005       sampler_index = nir_load_reloc_const_intel(
1006          b, BRW_SHADER_RELOC_EMBEDDED_SAMPLER_HANDLE +
1007          state->set[set].binding[binding].embedded_sampler_index);
1008    } else if (is_bindless) {
1009       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
1010          set_offset = nir_imm_int(b, 0xdeaddead);
1011 
1012          nir_def *desc_addr =
1013             build_desc_addr_for_binding(b, set, binding, array_index,
1014                                         plane, state);
1015 
1016          /* This is anv_sampled_image_descriptor, the sampler handle is always
1017           * in component 1.
1018           */
1019          nir_def *desc_data =
1020             build_load_descriptor_mem(b, desc_addr, 0, 2, 32, state);
1021 
1022          sampler_index = nir_channel(b, desc_data, 1);
1023       } else {
1024          set_offset = anv_load_driver_uniform(b, 1, desc_sampler_offsets[set]);
1025 
1026          uint32_t base_offset = descriptor_offset;
1027 
1028          /* The SAMPLER_STATE can only be located at a 64 byte in the combined
1029           * image/sampler case. Combined image/sampler is not supported to be
1030           * used with mutable descriptor types.
1031           */
1032          if (bind_layout->data & ANV_DESCRIPTOR_SURFACE_SAMPLER)
1033             base_offset += ANV_SURFACE_STATE_SIZE;
1034 
1035          if (plane != 0) {
1036             assert(plane < bind_layout->max_plane_count);
1037             base_offset += plane * (descriptor_stride /
1038                                     bind_layout->max_plane_count);
1039          }
1040 
1041          sampler_index =
1042             nir_iadd_imm(b,
1043                          nir_imul_imm(b, array_index, descriptor_stride),
1044                          base_offset);
1045       }
1046    } else {
1047       /* Unused */
1048       set_offset = nir_imm_int(b, 0xdeaddead);
1049 
1050       sampler_index =
1051          nir_iadd_imm(b, array_index,
1052                       state->set[set].binding[binding].sampler_offset + plane);
1053    }
1054 
1055    nir_resource_data_intel sampler_resource = nir_resource_intel_sampler;
1056    if (is_bindless)
1057       sampler_resource |= nir_resource_intel_bindless;
1058    if (is_embedded)
1059       sampler_resource |= nir_resource_intel_sampler_embedded;
1060    if (non_uniform)
1061       sampler_resource |= nir_resource_intel_non_uniform;
1062 
1063    return nir_resource_intel(b,
1064                              set_offset,
1065                              sampler_index,
1066                              array_index,
1067                              sampler_base_offset,
1068                              .desc_set = set,
1069                              .binding = binding,
1070                              .resource_access_intel = sampler_resource);
1071 }
1072 
1073 static nir_def *
build_buffer_dynamic_offset_for_res_index(nir_builder * b,nir_def * dyn_offset_base,nir_def * array_index,struct apply_pipeline_layout_state * state)1074 build_buffer_dynamic_offset_for_res_index(nir_builder *b,
1075                                           nir_def *dyn_offset_base,
1076                                           nir_def *array_index,
1077                                           struct apply_pipeline_layout_state *state)
1078 {
1079    nir_def *dyn_offset_idx = nir_iadd(b, dyn_offset_base, array_index);
1080 
1081    nir_def *dyn_load =
1082       anv_load_driver_uniform_indexed(b, 1, dynamic_offsets, dyn_offset_idx);
1083 
1084    return nir_bcsel(b, nir_ieq_imm(b, dyn_offset_base, 0xff),
1085                        nir_imm_int(b, 0), dyn_load);
1086 }
1087 
1088 /** Convert a Vulkan resource index into a buffer address
1089  *
1090  * In some cases, this does a  memory load from the descriptor set and, in
1091  * others, it simply converts from one form to another.
1092  *
1093  * See build_res_index for details about each resource index format.
1094  */
1095 static nir_def *
build_indirect_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1096 build_indirect_buffer_addr_for_res_index(nir_builder *b,
1097                                          const VkDescriptorType desc_type,
1098                                          nir_def *res_index,
1099                                          nir_address_format addr_format,
1100                                          struct apply_pipeline_layout_state *state)
1101 {
1102    struct res_index_defs res = unpack_res_index(b, res_index);
1103 
1104    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1105       assert(addr_format == state->desc_addr_format);
1106       return build_desc_addr_for_res_index(b, desc_type, res_index,
1107                                            addr_format, state);
1108    } else if (addr_format == nir_address_format_32bit_index_offset) {
1109       return nir_vec2(b, nir_iadd(b, res.bti_idx, res.array_index),
1110                          nir_imm_int(b, 0));
1111    }
1112 
1113    nir_def *desc_addr =
1114       build_desc_addr_for_res_index(b, desc_type, res_index,
1115                                     addr_format, state);
1116 
1117    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1118 
1119    if (state->has_dynamic_buffers) {
1120       /* This shader has dynamic offsets and we have no way of knowing
1121        * (save from the dynamic offset base index) if this buffer has a
1122        * dynamic offset.
1123        */
1124       nir_def *dyn_offset_idx =
1125          nir_iadd(b, res.dyn_offset_base, res.array_index);
1126 
1127       nir_def *dyn_load =
1128          anv_load_driver_uniform_indexed(b, 1, dynamic_offsets, dyn_offset_idx);
1129 
1130       nir_def *dynamic_offset =
1131          nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
1132                       nir_imm_int(b, 0), dyn_load);
1133 
1134       /* The dynamic offset gets added to the base pointer so that we
1135        * have a sliding window range.
1136        */
1137       nir_def *base_ptr =
1138          nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
1139       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1140       desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1141                          nir_unpack_64_2x32_split_y(b, base_ptr),
1142                          nir_channel(b, desc, 2),
1143                          nir_channel(b, desc, 3));
1144    }
1145 
1146    /* The last element of the vec4 is always zero.
1147     *
1148     * See also struct anv_address_range_descriptor
1149     */
1150    return nir_vec4(b, nir_channel(b, desc, 0),
1151                       nir_channel(b, desc, 1),
1152                       nir_channel(b, desc, 2),
1153                       nir_imm_int(b, 0));
1154 }
1155 
1156 static nir_def *
build_direct_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1157 build_direct_buffer_addr_for_res_index(nir_builder *b,
1158                                        const VkDescriptorType desc_type,
1159                                        nir_def *res_index,
1160                                        nir_address_format addr_format,
1161                                        struct apply_pipeline_layout_state *state)
1162 {
1163    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1164       assert(addr_format == state->desc_addr_format);
1165       return build_desc_addr_for_res_index(b, desc_type, res_index,
1166                                            addr_format, state);
1167    } else if (addr_format == nir_address_format_32bit_index_offset) {
1168       struct res_index_defs res = unpack_res_index(b, res_index);
1169 
1170       return nir_vec2(b, nir_iadd(b, res.desc_offset_base,
1171                                   nir_imul(b, res.array_index, res.desc_stride)),
1172                       nir_imm_int(b, 0));
1173    }
1174 
1175    nir_def *desc_addr =
1176       build_desc_addr_for_res_index(b, desc_type, res_index,
1177                                     addr_format, state);
1178 
1179    nir_def *addr =
1180       build_load_render_surface_state_address(b, desc_addr, state);
1181 
1182    if (state->has_dynamic_buffers) {
1183       struct res_index_defs res = unpack_res_index(b, res_index);
1184 
1185       /* This shader has dynamic offsets and we have no way of knowing (save
1186        * from the dynamic offset base index) if this buffer has a dynamic
1187        * offset.
1188        */
1189       nir_def *dynamic_offset =
1190          build_buffer_dynamic_offset_for_res_index(
1191             b, res.dyn_offset_base, res.array_index, state);
1192 
1193       /* The dynamic offset gets added to the base pointer so that we
1194        * have a sliding window range.
1195        */
1196       nir_def *base_ptr =
1197          nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1198       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1199       addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1200                          nir_unpack_64_2x32_split_y(b, base_ptr),
1201                          nir_channel(b, addr, 2),
1202                          nir_channel(b, addr, 3));
1203    }
1204 
1205    /* The last element of the vec4 is always zero.
1206     *
1207     * See also struct anv_address_range_descriptor
1208     */
1209    return nir_vec4(b, nir_channel(b, addr, 0),
1210                       nir_channel(b, addr, 1),
1211                       nir_channel(b, addr, 2),
1212                       nir_imm_int(b, 0));
1213 }
1214 
1215 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1216 build_buffer_addr_for_res_index(nir_builder *b,
1217                                 const VkDescriptorType desc_type,
1218                                 nir_def *res_index,
1219                                 nir_address_format addr_format,
1220                                 struct apply_pipeline_layout_state *state)
1221 {
1222    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT)
1223       return build_indirect_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1224    else
1225       return build_direct_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1226 }
1227 
1228 static nir_def *
build_buffer_addr_for_binding(nir_builder * b,const VkDescriptorType desc_type,unsigned set,unsigned binding,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1229 build_buffer_addr_for_binding(nir_builder *b,
1230                               const VkDescriptorType desc_type,
1231                               unsigned set,
1232                               unsigned binding,
1233                               nir_def *res_index,
1234                               nir_address_format addr_format,
1235                               struct apply_pipeline_layout_state *state)
1236 {
1237    if (addr_format != nir_address_format_32bit_index_offset)
1238       return build_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1239 
1240    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1241       const struct anv_descriptor_set_binding_layout *bind_layout =
1242          &state->layout->set[set].layout->binding[binding];
1243       return nir_vec2(b,
1244                       nir_imm_int(b, state->set[set].desc_offset),
1245                       nir_imm_int(b, bind_layout->descriptor_surface_offset));
1246    }
1247 
1248    struct res_index_defs res = unpack_res_index(b, res_index);
1249 
1250    return nir_vec2(b,
1251                    build_surface_index_for_binding(b, set, binding, res.array_index,
1252                                                    0 /* plane */,
1253                                                    false /* non_uniform */,
1254                                                    state),
1255                    nir_imm_int(b, 0));
1256 }
1257 
1258 /** Loads descriptor memory for a variable-based deref chain
1259  *
1260  * The deref chain has to terminate at a variable with a descriptor_set and
1261  * binding set.  This is used for images, textures, and samplers.
1262  */
1263 static nir_def *
build_load_var_deref_surface_handle(nir_builder * b,nir_deref_instr * deref,bool non_uniform,bool * out_is_bindless,struct apply_pipeline_layout_state * state)1264 build_load_var_deref_surface_handle(nir_builder *b, nir_deref_instr *deref,
1265                                     bool non_uniform,
1266                                     bool *out_is_bindless,
1267                                     struct apply_pipeline_layout_state *state)
1268 {
1269    nir_variable *var = nir_deref_instr_get_variable(deref);
1270 
1271    const uint32_t set = var->data.descriptor_set;
1272    const uint32_t binding = var->data.binding;
1273 
1274    *out_is_bindless =
1275       is_binding_bindless(set, binding, false /* sampler */, state);
1276 
1277    nir_def *array_index;
1278    if (deref->deref_type != nir_deref_type_var) {
1279       assert(deref->deref_type == nir_deref_type_array);
1280       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1281       array_index = deref->arr.index.ssa;
1282    } else {
1283       array_index = nir_imm_int(b, 0);
1284    }
1285 
1286    return build_surface_index_for_binding(b, set, binding, array_index,
1287                                           0 /* plane */, non_uniform, state);
1288 }
1289 
1290 /** A recursive form of build_res_index()
1291  *
1292  * This recursively walks a resource [re]index chain and builds the resource
1293  * index.  It places the new code with the resource [re]index operation in the
1294  * hopes of better CSE.  This means the cursor is not where you left it when
1295  * this function returns.
1296  */
1297 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)1298 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
1299                           nir_address_format addr_format,
1300                           uint32_t *set, uint32_t *binding,
1301                           struct apply_pipeline_layout_state *state)
1302 {
1303    if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
1304       b->cursor = nir_before_instr(&intrin->instr);
1305       *set = nir_intrinsic_desc_set(intrin);
1306       *binding = nir_intrinsic_binding(intrin);
1307       return build_res_index(b, *set, *binding, intrin->src[0].ssa, state);
1308    } else {
1309       assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
1310       nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
1311       nir_def *index =
1312          build_res_index_for_chain(b, parent, addr_format,
1313                                    set, binding, state);
1314 
1315       b->cursor = nir_before_instr(&intrin->instr);
1316 
1317       return build_res_reindex(b, index, intrin->src[1].ssa);
1318    }
1319 }
1320 
1321 /** Builds a buffer address for a given vulkan [re]index intrinsic
1322  *
1323  * The cursor is not where you left it when this function returns.
1324  */
1325 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1326 build_buffer_addr_for_idx_intrin(nir_builder *b,
1327                                  nir_intrinsic_instr *idx_intrin,
1328                                  nir_address_format addr_format,
1329                                  struct apply_pipeline_layout_state *state)
1330 {
1331    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1332    nir_def *res_index =
1333       build_res_index_for_chain(b, idx_intrin, addr_format,
1334                                 &set, &binding, state);
1335 
1336    const struct anv_descriptor_set_binding_layout *bind_layout =
1337       &state->layout->set[set].layout->binding[binding];
1338 
1339    return build_buffer_addr_for_binding(b, bind_layout->type,
1340                                         set, binding, res_index,
1341                                         addr_format, state);
1342 }
1343 
1344 /** Builds a buffer address for deref chain
1345  *
1346  * This assumes that you can chase the chain all the way back to the original
1347  * vulkan_resource_index intrinsic.
1348  *
1349  * The cursor is not where you left it when this function returns.
1350  */
1351 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1352 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
1353                             nir_address_format addr_format,
1354                             struct apply_pipeline_layout_state *state)
1355 {
1356    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1357    if (parent) {
1358       nir_def *addr =
1359          build_buffer_addr_for_deref(b, parent, addr_format, state);
1360 
1361       b->cursor = nir_before_instr(&deref->instr);
1362       return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
1363    }
1364 
1365    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
1366    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1367 
1368    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1369 
1370    b->cursor = nir_before_instr(&deref->instr);
1371 
1372    return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
1373 }
1374 
1375 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,bool is_atomic,struct apply_pipeline_layout_state * state)1376 try_lower_direct_buffer_intrinsic(nir_builder *b,
1377                                   nir_intrinsic_instr *intrin, bool is_atomic,
1378                                   struct apply_pipeline_layout_state *state)
1379 {
1380    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1381    if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
1382       return false;
1383 
1384    nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
1385    if (desc == NULL) {
1386       /* We should always be able to find the descriptor for UBO access. */
1387       assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
1388       return false;
1389    }
1390 
1391    const unsigned set = nir_intrinsic_desc_set(desc);
1392    const unsigned binding = nir_intrinsic_binding(desc);
1393 
1394    const struct anv_descriptor_set_binding_layout *bind_layout =
1395       &state->layout->set[set].layout->binding[binding];
1396 
1397    nir_address_format addr_format = descriptor_address_format(desc, state);
1398 
1399    /* Although we could lower non uniform binding table accesses with
1400     * nir_opt_non_uniform_access, we might as well use an A64 message and
1401     * avoid the loops inserted by that lowering pass.
1402     */
1403    if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
1404       return false;
1405 
1406    if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
1407       /* 64-bit atomics only support A64 messages so we can't lower them to
1408        * the index+offset model.
1409        */
1410       if (is_atomic && intrin->def.bit_size == 64 &&
1411           !state->pdevice->info.has_lsc)
1412          return false;
1413 
1414       /* If we don't have a BTI for this binding and we're using indirect
1415        * descriptors, we'll use A64 messages. This is handled in the main
1416        * lowering path.
1417        */
1418       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1419           !descriptor_has_bti(desc, state))
1420          return false;
1421 
1422       /* Rewrite to 32bit_index_offset whenever we can */
1423       addr_format = nir_address_format_32bit_index_offset;
1424    } else {
1425       assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
1426 
1427       /* If we don't have a BTI for this binding and we're using indirect
1428        * descriptors, we'll use A64 messages. This is handled in the main
1429        * lowering path.
1430        *
1431        * We make an exception for uniform blocks which are built from the
1432        * descriptor set base address + offset. There is no indirect data to
1433        * fetch.
1434        */
1435       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1436           bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK &&
1437           !descriptor_has_bti(desc, state))
1438          return false;
1439 
1440       /* If this is an inline uniform and the shader stage is bindless, we
1441        * can't switch to 32bit_index_offset.
1442        */
1443       if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
1444           !brw_shader_stage_requires_bindless_resources(b->shader->info.stage))
1445          addr_format = nir_address_format_32bit_index_offset;
1446    }
1447 
1448    /* If a dynamic has not been assigned a binding table entry, we need to
1449     * bail here.
1450     */
1451    if (vk_descriptor_type_is_dynamic(bind_layout->type) &&
1452        !descriptor_has_bti(desc, state))
1453       return false;
1454 
1455    nir_def *addr =
1456       build_buffer_addr_for_deref(b, deref, addr_format, state);
1457 
1458    b->cursor = nir_before_instr(&intrin->instr);
1459    nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
1460 
1461    return true;
1462 }
1463 
1464 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)1465 lower_load_accel_struct_desc(nir_builder *b,
1466                              nir_intrinsic_instr *load_desc,
1467                              struct apply_pipeline_layout_state *state)
1468 {
1469    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1470 
1471    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1472 
1473    /* It doesn't really matter what address format we choose as
1474     * everything will constant-fold nicely.  Choose one that uses the
1475     * actual descriptor buffer.
1476     */
1477    const nir_address_format addr_format =
1478       nir_address_format_64bit_bounded_global;
1479 
1480    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1481    nir_def *res_index =
1482       build_res_index_for_chain(b, idx_intrin, addr_format,
1483                                 &set, &binding, state);
1484 
1485    b->cursor = nir_before_instr(&load_desc->instr);
1486 
1487    struct res_index_defs res = unpack_res_index(b, res_index);
1488    nir_def *desc_addr =
1489       build_desc_addr_for_binding(b, set, binding, res.array_index,
1490                                   0 /* plane */, state);
1491 
1492    /* Acceleration structure descriptors are always uint64_t */
1493    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
1494 
1495    assert(load_desc->def.bit_size == 64);
1496    assert(load_desc->def.num_components == 1);
1497    nir_def_replace(&load_desc->def, desc);
1498 
1499    return true;
1500 }
1501 
1502 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)1503 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
1504 {
1505    struct apply_pipeline_layout_state *state = _state;
1506 
1507    if (instr->type != nir_instr_type_intrinsic)
1508       return false;
1509 
1510    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1511    switch (intrin->intrinsic) {
1512    case nir_intrinsic_load_deref:
1513    case nir_intrinsic_store_deref:
1514       return try_lower_direct_buffer_intrinsic(b, intrin, false, state);
1515 
1516    case nir_intrinsic_deref_atomic:
1517    case nir_intrinsic_deref_atomic_swap:
1518       return try_lower_direct_buffer_intrinsic(b, intrin, true, state);
1519 
1520    case nir_intrinsic_get_ssbo_size: {
1521       /* The get_ssbo_size intrinsic always just takes a
1522        * index/reindex intrinsic.
1523        */
1524       nir_intrinsic_instr *idx_intrin =
1525          find_descriptor_for_index_src(intrin->src[0], state);
1526       if (idx_intrin == NULL)
1527          return false;
1528 
1529       /* We just checked that this is a BTI descriptor */
1530       const nir_address_format addr_format =
1531          nir_address_format_32bit_index_offset;
1532 
1533       b->cursor = nir_before_instr(&intrin->instr);
1534 
1535       uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1536       nir_def *res_index =
1537          build_res_index_for_chain(b, idx_intrin, addr_format,
1538                                    &set, &binding, state);
1539 
1540       bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1541 
1542       nir_def *surface_index =
1543          build_surface_index_for_binding(b, set, binding,
1544                                          nir_channel(b, res_index, 3),
1545                                          0 /* plane */,
1546                                          non_uniform,
1547                                          state);
1548 
1549       nir_src_rewrite(&intrin->src[0], surface_index);
1550       _mesa_set_add(state->lowered_instrs, intrin);
1551       return true;
1552    }
1553 
1554    case nir_intrinsic_load_vulkan_descriptor:
1555       if (nir_intrinsic_desc_type(intrin) ==
1556           VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1557          return lower_load_accel_struct_desc(b, intrin, state);
1558       return false;
1559 
1560    default:
1561       return false;
1562    }
1563 }
1564 
1565 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1566 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1567                           struct apply_pipeline_layout_state *state)
1568 {
1569    b->cursor = nir_before_instr(&intrin->instr);
1570 
1571    nir_def *index =
1572       build_res_index(b, nir_intrinsic_desc_set(intrin),
1573                          nir_intrinsic_binding(intrin),
1574                          intrin->src[0].ssa,
1575                          state);
1576 
1577    assert(intrin->def.bit_size == index->bit_size);
1578    assert(intrin->def.num_components == index->num_components);
1579    nir_def_replace(&intrin->def, index);
1580 
1581    return true;
1582 }
1583 
1584 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1585 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1586                             struct apply_pipeline_layout_state *state)
1587 {
1588    b->cursor = nir_before_instr(&intrin->instr);
1589 
1590    nir_def *index =
1591       build_res_reindex(b, intrin->src[0].ssa,
1592                            intrin->src[1].ssa);
1593 
1594    assert(intrin->def.bit_size == index->bit_size);
1595    assert(intrin->def.num_components == index->num_components);
1596    nir_def_replace(&intrin->def, index);
1597 
1598    return true;
1599 }
1600 
1601 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1602 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1603                              struct apply_pipeline_layout_state *state)
1604 {
1605    b->cursor = nir_before_instr(&intrin->instr);
1606 
1607    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1608    nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
1609 
1610    nir_def *desc =
1611       build_buffer_addr_for_res_index(b,
1612                                       desc_type, intrin->src[0].ssa,
1613                                       addr_format, state);
1614 
1615    assert(intrin->def.bit_size == desc->bit_size);
1616    assert(intrin->def.num_components == desc->num_components);
1617    nir_def_replace(&intrin->def, desc);
1618 
1619    return true;
1620 }
1621 
1622 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1623 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
1624                     struct apply_pipeline_layout_state *state)
1625 {
1626    if (_mesa_set_search(state->lowered_instrs, intrin))
1627       return false;
1628 
1629    b->cursor = nir_before_instr(&intrin->instr);
1630 
1631    const nir_address_format addr_format =
1632       nir_address_format_64bit_bounded_global;
1633 
1634    nir_def *desc_addr =
1635       nir_build_addr_iadd_imm(
1636          b,
1637          build_desc_addr_for_res_index(b,
1638                                        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1639                                        intrin->src[0].ssa,
1640                                        addr_format, state),
1641          addr_format,
1642          nir_var_mem_ssbo,
1643          state->pdevice->isl_dev.ss.size);
1644 
1645    nir_def *desc_range;
1646    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
1647       /* Load the anv_address_range_descriptor */
1648       desc_range =
1649          build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1650    } else {
1651       /* Build a vec4 similar to anv_address_range_descriptor using the
1652        * RENDER_SURFACE_STATE.
1653        */
1654       desc_range =
1655          build_load_render_surface_state_address(b, desc_addr, state);
1656    }
1657 
1658    nir_def *size = nir_channel(b, desc_range, 2);
1659    nir_def_replace(&intrin->def, size);
1660 
1661    return true;
1662 }
1663 
1664 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1665 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1666                       struct apply_pipeline_layout_state *state)
1667 {
1668    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1669 
1670    b->cursor = nir_before_instr(&intrin->instr);
1671 
1672    bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1673    bool is_bindless;
1674    nir_def *handle =
1675       build_load_var_deref_surface_handle(b, deref, non_uniform,
1676                                           &is_bindless, state);
1677    nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1678 
1679    return true;
1680 }
1681 
1682 static bool
lower_image_size_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1683 lower_image_size_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1684                            struct apply_pipeline_layout_state *state)
1685 {
1686    if (nir_intrinsic_image_dim(intrin) != GLSL_SAMPLER_DIM_3D)
1687       return lower_image_intrinsic(b, intrin, state);
1688 
1689    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1690 
1691    b->cursor = nir_before_instr(&intrin->instr);
1692 
1693    bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1694    bool is_bindless;
1695    nir_def *handle =
1696       build_load_var_deref_surface_handle(b, deref, non_uniform,
1697                                           &is_bindless, state);
1698    nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1699 
1700    nir_variable *var = nir_deref_instr_get_variable(deref);
1701    const uint32_t set = var->data.descriptor_set;
1702    const uint32_t binding = var->data.binding;
1703 
1704    nir_def *array_index;
1705    if (deref->deref_type != nir_deref_type_var) {
1706       assert(deref->deref_type == nir_deref_type_array);
1707       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1708       array_index = deref->arr.index.ssa;
1709    } else {
1710       array_index = nir_imm_int(b, 0);
1711    }
1712 
1713    nir_def *desc_addr = build_desc_addr_for_binding(
1714       b, set, binding, array_index, 0 /* plane */, state);
1715 
1716    b->cursor = nir_after_instr(&intrin->instr);
1717 
1718    nir_def *image_depth =
1719       build_load_storage_3d_image_depth(b, desc_addr,
1720                                         nir_channel(b, &intrin->def, 2),
1721                                         state);
1722 
1723    nir_def *comps[4] = {};
1724    for (unsigned c = 0; c < intrin->def.num_components; c++)
1725       comps[c] = c == 2 ? image_depth : nir_channel(b, &intrin->def, c);
1726 
1727    nir_def *vec = nir_vec(b, comps, intrin->def.num_components);
1728    nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
1729 
1730    return true;
1731 }
1732 
1733 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1734 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
1735                     struct apply_pipeline_layout_state *state)
1736 {
1737    b->cursor = nir_instr_remove(&intrin->instr);
1738 
1739    /* Any constant-offset load_constant instructions should have been removed
1740     * by constant folding.
1741     */
1742    assert(!nir_src_is_const(intrin->src[0]));
1743    nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
1744                                       nir_intrinsic_base(intrin));
1745 
1746    unsigned load_size = intrin->def.num_components *
1747                         intrin->def.bit_size / 8;
1748    unsigned load_align = intrin->def.bit_size / 8;
1749 
1750    assert(load_size < b->shader->constant_data_size);
1751    unsigned max_offset = b->shader->constant_data_size - load_size;
1752    offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
1753 
1754    nir_def *const_data_addr = nir_pack_64_2x32_split(b,
1755       nir_iadd(b,
1756          nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
1757          offset),
1758       nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
1759 
1760    nir_def *data =
1761       nir_load_global_constant(b, const_data_addr,
1762                                load_align,
1763                                intrin->def.num_components,
1764                                intrin->def.bit_size);
1765 
1766    nir_def_rewrite_uses(&intrin->def, data);
1767 
1768    return true;
1769 }
1770 
1771 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1772 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
1773                         struct apply_pipeline_layout_state *state)
1774 {
1775    b->cursor = nir_instr_remove(&intrin->instr);
1776 
1777    nir_def *base_workgroup_id =
1778       anv_load_driver_uniform(b, 3, cs.base_work_group_id[0]);
1779    nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
1780 
1781    return true;
1782 }
1783 
1784 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned base_index,unsigned plane,struct apply_pipeline_layout_state * state)1785 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
1786                 nir_tex_src_type deref_src_type,
1787                 unsigned base_index, unsigned plane,
1788                 struct apply_pipeline_layout_state *state)
1789 {
1790    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
1791    if (deref_src_idx < 0)
1792       return;
1793 
1794    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1795    nir_variable *var = nir_deref_instr_get_variable(deref);
1796 
1797    const bool is_sampler = deref_src_type == nir_tex_src_sampler_deref;
1798    const unsigned set = var->data.descriptor_set;
1799    const unsigned binding = var->data.binding;
1800    const bool bindless = is_binding_bindless(set, binding, is_sampler, state);
1801 
1802    nir_def *array_index = NULL;
1803    if (deref->deref_type != nir_deref_type_var) {
1804       assert(deref->deref_type == nir_deref_type_array);
1805 
1806       array_index = deref->arr.index.ssa;
1807    } else {
1808       array_index = nir_imm_int(b, 0);
1809    }
1810 
1811    nir_tex_src_type offset_src_type;
1812    nir_def *index;
1813    if (deref_src_type == nir_tex_src_texture_deref) {
1814       index = build_surface_index_for_binding(b, set, binding, array_index,
1815                                               plane,
1816                                               tex->texture_non_uniform,
1817                                               state);
1818       offset_src_type = bindless ?
1819                         nir_tex_src_texture_handle :
1820                         nir_tex_src_texture_offset;
1821    } else {
1822       assert(deref_src_type == nir_tex_src_sampler_deref);
1823 
1824       index = build_sampler_handle_for_binding(b, set, binding, array_index,
1825                                                plane,
1826                                                tex->sampler_non_uniform,
1827                                                state);
1828       offset_src_type = bindless ?
1829                         nir_tex_src_sampler_handle :
1830                         nir_tex_src_sampler_offset;
1831    }
1832 
1833    nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1834    tex->src[deref_src_idx].src_type = offset_src_type;
1835 }
1836 
1837 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1838 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1839 {
1840    int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1841    if (plane_src_idx < 0)
1842       return 0;
1843 
1844    unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1845 
1846    nir_tex_instr_remove_src(tex, plane_src_idx);
1847 
1848    return plane;
1849 }
1850 
1851 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1852 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1853                        unsigned start, unsigned end)
1854 {
1855    if (start == end - 1) {
1856       return srcs[start];
1857    } else {
1858       unsigned mid = start + (end - start) / 2;
1859       return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1860                        build_def_array_select(b, srcs, idx, start, mid),
1861                        build_def_array_select(b, srcs, idx, mid, end));
1862    }
1863 }
1864 
1865 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1866 lower_tex(nir_builder *b, nir_tex_instr *tex,
1867           struct apply_pipeline_layout_state *state)
1868 {
1869    unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1870 
1871    b->cursor = nir_before_instr(&tex->instr);
1872 
1873    lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1874                    tex->texture_index, plane, state);
1875    lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1876                    tex->sampler_index, plane, state);
1877 
1878    /* The whole lot will be embedded in the offset/handle source */
1879    tex->texture_index = 0;
1880    tex->sampler_index = 0;
1881 
1882    return true;
1883 }
1884 
1885 static bool
lower_ray_query_globals(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1886 lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin,
1887                         struct apply_pipeline_layout_state *state)
1888 {
1889    b->cursor = nir_instr_remove(&intrin->instr);
1890 
1891    nir_def *rq_globals = anv_load_driver_uniform(b, 1, ray_query_globals);
1892    nir_def_rewrite_uses(&intrin->def, rq_globals);
1893 
1894    return true;
1895 }
1896 
1897 static bool
lower_num_workgroups(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1898 lower_num_workgroups(nir_builder *b, nir_intrinsic_instr *intrin,
1899                      struct apply_pipeline_layout_state *state)
1900 {
1901    /* For those stages, HW will generate values through payload registers. */
1902    if (gl_shader_stage_is_mesh(b->shader->info.stage))
1903       return false;
1904 
1905    b->cursor = nir_instr_remove(&intrin->instr);
1906    nir_def *num_workgroups;
1907    /* On Gfx12.5+ we use the inline register to push the values, on prior
1908     * generation we use push constants.
1909     */
1910    if (state->pdevice->info.verx10 >= 125) {
1911       num_workgroups =
1912          nir_load_inline_data_intel(
1913             b, 3, 32,
1914             .base = ANV_INLINE_PARAM_NUM_WORKGROUPS_OFFSET);
1915    } else {
1916       num_workgroups =
1917          anv_load_driver_uniform(b, 3, cs.num_work_groups[0]);
1918    }
1919 
1920    nir_def *num_workgroups_indirect;
1921    nir_push_if(b, nir_ieq_imm(b, nir_channel(b, num_workgroups, 0), UINT32_MAX));
1922    {
1923       nir_def *addr = nir_pack_64_2x32_split(b,
1924                                              nir_channel(b, num_workgroups, 1),
1925                                              nir_channel(b, num_workgroups, 2));
1926       num_workgroups_indirect = nir_load_global_constant(b, addr, 4, 3, 32);
1927    }
1928    nir_pop_if(b, NULL);
1929 
1930    num_workgroups = nir_if_phi(b, num_workgroups_indirect, num_workgroups);
1931    nir_def_rewrite_uses(&intrin->def, num_workgroups);
1932 
1933    return true;
1934 }
1935 
1936 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1937 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1938 {
1939    struct apply_pipeline_layout_state *state = _state;
1940 
1941    switch (instr->type) {
1942    case nir_instr_type_intrinsic: {
1943       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1944       switch (intrin->intrinsic) {
1945       case nir_intrinsic_vulkan_resource_index:
1946          return lower_res_index_intrinsic(b, intrin, state);
1947       case nir_intrinsic_vulkan_resource_reindex:
1948          return lower_res_reindex_intrinsic(b, intrin, state);
1949       case nir_intrinsic_load_vulkan_descriptor:
1950          return lower_load_vulkan_descriptor(b, intrin, state);
1951       case nir_intrinsic_get_ssbo_size:
1952          return lower_get_ssbo_size(b, intrin, state);
1953       case nir_intrinsic_image_deref_load:
1954       case nir_intrinsic_image_deref_store:
1955       case nir_intrinsic_image_deref_atomic:
1956       case nir_intrinsic_image_deref_atomic_swap:
1957       case nir_intrinsic_image_deref_samples:
1958       case nir_intrinsic_image_deref_load_param_intel:
1959       case nir_intrinsic_image_deref_load_raw_intel:
1960       case nir_intrinsic_image_deref_store_raw_intel:
1961       case nir_intrinsic_image_deref_sparse_load:
1962          return lower_image_intrinsic(b, intrin, state);
1963       case nir_intrinsic_image_deref_size:
1964          return lower_image_size_intrinsic(b, intrin, state);
1965       case nir_intrinsic_load_constant:
1966          return lower_load_constant(b, intrin, state);
1967       case nir_intrinsic_load_base_workgroup_id:
1968          return lower_base_workgroup_id(b, intrin, state);
1969       case nir_intrinsic_load_ray_query_global_intel:
1970          return lower_ray_query_globals(b, intrin, state);
1971       case nir_intrinsic_load_num_workgroups:
1972          return lower_num_workgroups(b, intrin, state);
1973       default:
1974          return false;
1975       }
1976       break;
1977    }
1978    case nir_instr_type_tex:
1979       return lower_tex(b, nir_instr_as_tex(instr), state);
1980    default:
1981       return false;
1982    }
1983 }
1984 
1985 struct binding_info {
1986    uint32_t binding;
1987    uint8_t set;
1988    uint16_t score;
1989 };
1990 
1991 static int
compare_binding_infos(const void * _a,const void * _b)1992 compare_binding_infos(const void *_a, const void *_b)
1993 {
1994    const struct binding_info *a = _a, *b = _b;
1995    if (a->score != b->score)
1996       return b->score - a->score;
1997 
1998    if (a->set != b->set)
1999       return a->set - b->set;
2000 
2001    return a->binding - b->binding;
2002 }
2003 
2004 #ifndef NDEBUG
2005 static void
anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout * layout,nir_shader * shader)2006 anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout *layout,
2007                              nir_shader *shader)
2008 {
2009    nir_foreach_function_impl(impl, shader) {
2010       nir_foreach_block(block, impl) {
2011          nir_foreach_instr(instr, block) {
2012             if (instr->type != nir_instr_type_intrinsic)
2013                continue;
2014 
2015             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2016             if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
2017                continue;
2018 
2019             unsigned set = nir_intrinsic_desc_set(intrin);
2020             assert(layout->set[set].layout);
2021          }
2022       }
2023    }
2024 }
2025 #endif
2026 
2027 static bool
binding_is_promotable_to_push(const struct anv_descriptor_set_layout * set_layout,const struct anv_descriptor_set_binding_layout * bind_layout)2028 binding_is_promotable_to_push(const struct anv_descriptor_set_layout *set_layout,
2029                               const struct anv_descriptor_set_binding_layout *bind_layout)
2030 {
2031    if (set_layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)
2032       return true;
2033 
2034    if (set_layout->flags & (VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT |
2035                             VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT))
2036       return false;
2037 
2038    return (bind_layout->flags & non_pushable_binding_flags) == 0;
2039 }
2040 
2041 static void
add_null_bti_entry(struct anv_pipeline_bind_map * map)2042 add_null_bti_entry(struct anv_pipeline_bind_map *map)
2043 {
2044    map->surface_to_descriptor[map->surface_count++] =
2045       (struct anv_pipeline_binding) {
2046          .set = ANV_DESCRIPTOR_SET_NULL,
2047    };
2048    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2049 }
2050 
2051 static void
add_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_descriptor_set_binding_layout * bind_layout)2052 add_bti_entry(struct anv_pipeline_bind_map *map,
2053               uint32_t set,
2054               uint32_t binding,
2055               uint32_t element,
2056               uint32_t plane,
2057               const struct anv_descriptor_set_binding_layout *bind_layout)
2058 {
2059    map->surface_to_descriptor[map->surface_count++] =
2060       (struct anv_pipeline_binding) {
2061          .set = set,
2062          .binding = binding,
2063          .index = bind_layout->descriptor_index + element,
2064          .set_offset = bind_layout->descriptor_surface_offset +
2065                        element * bind_layout->descriptor_surface_stride +
2066                        plane * bind_layout->descriptor_data_surface_size,
2067          .plane = plane,
2068    };
2069    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2070 }
2071 
2072 static void
add_dynamic_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)2073 add_dynamic_bti_entry(struct anv_pipeline_bind_map *map,
2074                       uint32_t set,
2075                       uint32_t binding,
2076                       uint32_t element,
2077                       const struct anv_pipeline_sets_layout *layout,
2078                       const struct anv_descriptor_set_binding_layout *bind_layout)
2079 {
2080    map->surface_to_descriptor[map->surface_count++] =
2081       (struct anv_pipeline_binding) {
2082          .set = set,
2083          .binding = binding,
2084          .index = bind_layout->descriptor_index + element,
2085          .set_offset = bind_layout->descriptor_surface_offset +
2086                        element * bind_layout->descriptor_surface_stride,
2087          .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
2088    };
2089    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2090 }
2091 
2092 static void
add_sampler_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)2093 add_sampler_entry(struct anv_pipeline_bind_map *map,
2094                   uint32_t set,
2095                   uint32_t binding,
2096                   uint32_t element,
2097                   uint32_t plane,
2098                   const struct anv_pipeline_sets_layout *layout,
2099                   const struct anv_descriptor_set_binding_layout *bind_layout)
2100 {
2101    assert((bind_layout->descriptor_index + element) < layout->set[set].layout->descriptor_count);
2102    map->sampler_to_descriptor[map->sampler_count++] =
2103       (struct anv_pipeline_binding) {
2104          .set = set,
2105          .binding = binding,
2106          .index = bind_layout->descriptor_index + element,
2107          .plane = plane,
2108    };
2109 }
2110 
2111 static void
add_push_entry(struct anv_pipeline_push_map * push_map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)2112 add_push_entry(struct anv_pipeline_push_map *push_map,
2113                uint32_t set,
2114                uint32_t binding,
2115                uint32_t element,
2116                const struct anv_pipeline_sets_layout *layout,
2117                const struct anv_descriptor_set_binding_layout *bind_layout)
2118 {
2119    push_map->block_to_descriptor[push_map->block_count++] =
2120       (struct anv_pipeline_binding) {
2121          .set = set,
2122          .binding = binding,
2123          .index = bind_layout->descriptor_index + element,
2124          .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
2125    };
2126 }
2127 
2128 static void
add_embedded_sampler_entry(struct apply_pipeline_layout_state * state,struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding)2129 add_embedded_sampler_entry(struct apply_pipeline_layout_state *state,
2130                            struct anv_pipeline_bind_map *map,
2131                            uint32_t set, uint32_t binding)
2132 {
2133    state->set[set].binding[binding].embedded_sampler_index =
2134       map->embedded_sampler_count;
2135    struct anv_pipeline_embedded_sampler_binding *sampler =
2136       &map->embedded_sampler_to_binding[map->embedded_sampler_count++];
2137    const struct anv_descriptor_set_layout *set_layout =
2138       state->layout->set[set].layout;
2139    const struct anv_descriptor_set_binding_layout *bind_layout =
2140       &set_layout->binding[binding];
2141 
2142    *sampler = (struct anv_pipeline_embedded_sampler_binding) {
2143       .set = set,
2144       .binding = binding,
2145    };
2146 
2147    assert(sizeof(sampler->key.sampler) ==
2148           sizeof(bind_layout->immutable_samplers[0]->state_no_bc[0]));
2149    memcpy(sampler->key.sampler,
2150           bind_layout->immutable_samplers[0]->state_no_bc[0],
2151           sizeof(sampler->key.sampler));
2152 
2153    assert(sizeof(sampler->key.color) ==
2154           sizeof(bind_layout->immutable_samplers[0]->vk.border_color_value.uint32));
2155    memcpy(sampler->key.color,
2156           bind_layout->immutable_samplers[0]->vk.border_color_value.uint32,
2157           sizeof(sampler->key.color));
2158 }
2159 
2160 static bool
binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,uint32_t set,uint32_t binding)2161 binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state *state,
2162                                          const struct anv_descriptor_set_binding_layout *bind_layout,
2163                                          uint32_t set, uint32_t binding)
2164 {
2165    if ((bind_layout->data & ANV_DESCRIPTOR_BTI_SURFACE_STATE) == 0)
2166       return false;
2167 
2168    if (state->pdevice->always_use_bindless &&
2169        (bind_layout->data & ANV_DESCRIPTOR_SURFACE))
2170       return false;
2171 
2172    if (state->set[set].binding[binding].properties &
2173        BINDING_PROPERTY_NO_BINDING_TABLE)
2174       return false;
2175 
2176    return true;
2177 }
2178 
2179 static bool
binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * binding)2180 binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state *state,
2181                                          const struct anv_descriptor_set_binding_layout *binding)
2182 {
2183    if ((binding->data & ANV_DESCRIPTOR_BTI_SAMPLER_STATE) == 0)
2184       return false;
2185 
2186    if (state->pdevice->always_use_bindless &&
2187        (binding->data & ANV_DESCRIPTOR_SAMPLER))
2188       return false;
2189 
2190    return true;
2191 }
2192 
2193 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum brw_robustness_flags robust_flags,bool independent_sets,const struct anv_pipeline_sets_layout * layout,struct anv_pipeline_bind_map * map,struct anv_pipeline_push_map * push_map,void * push_map_mem_ctx)2194 anv_nir_apply_pipeline_layout(nir_shader *shader,
2195                               const struct anv_physical_device *pdevice,
2196                               enum brw_robustness_flags robust_flags,
2197                               bool independent_sets,
2198                               const struct anv_pipeline_sets_layout *layout,
2199                               struct anv_pipeline_bind_map *map,
2200                               struct anv_pipeline_push_map *push_map,
2201                               void *push_map_mem_ctx)
2202 {
2203    void *mem_ctx = ralloc_context(NULL);
2204 
2205 #ifndef NDEBUG
2206    /* We should not have have any reference to a descriptor set that is not
2207     * given through the pipeline layout (layout->set[set].layout = NULL).
2208     */
2209    anv_validate_pipeline_layout(layout, shader);
2210 #endif
2211 
2212    const bool bindless_stage =
2213       brw_shader_stage_requires_bindless_resources(shader->info.stage);
2214    struct apply_pipeline_layout_state state = {
2215       .pdevice = pdevice,
2216       .layout = layout,
2217       .desc_addr_format = bindless_stage ?
2218                           nir_address_format_64bit_global_32bit_offset :
2219                           nir_address_format_32bit_index_offset,
2220       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
2221       .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
2222       .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
2223       .has_independent_sets = independent_sets,
2224    };
2225 
2226    /* Compute the amount of push block items required. */
2227    unsigned push_block_count = 0;
2228    for (unsigned s = 0; s < layout->num_sets; s++) {
2229       if (!layout->set[s].layout)
2230          continue;
2231 
2232       const unsigned count = layout->set[s].layout->binding_count;
2233       state.set[s].binding = rzalloc_array_size(mem_ctx, sizeof(state.set[s].binding[0]), count);
2234 
2235       const struct anv_descriptor_set_layout *set_layout = layout->set[s].layout;
2236       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2237          if (set_layout->binding[b].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
2238             push_block_count += set_layout->binding[b].array_size;
2239       }
2240    }
2241 
2242    /* Find all use sets/bindings */
2243    nir_shader_instructions_pass(shader, get_used_bindings,
2244                                 nir_metadata_all, &state);
2245 
2246    /* Assign a BTI to each used descriptor set */
2247    for (unsigned s = 0; s < layout->num_sets; s++) {
2248       if (state.desc_addr_format != nir_address_format_32bit_index_offset) {
2249          state.set[s].desc_offset = BINDLESS_OFFSET;
2250       } else if (state.set[s].desc_buffer_used) {
2251          map->surface_to_descriptor[map->surface_count] =
2252             (struct anv_pipeline_binding) {
2253                .set = (layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) ?
2254                       ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER :
2255                       ANV_DESCRIPTOR_SET_DESCRIPTORS,
2256                .binding = UINT32_MAX,
2257                .index = s,
2258             };
2259          state.set[s].desc_offset = map->surface_count++;
2260       }
2261    }
2262 
2263    /* Assign a block index for each surface */
2264    push_map->block_to_descriptor =
2265       rzalloc_array(push_map_mem_ctx, struct anv_pipeline_binding,
2266                     map->surface_count + push_block_count);
2267 
2268    memcpy(push_map->block_to_descriptor,
2269           map->surface_to_descriptor,
2270           sizeof(push_map->block_to_descriptor[0]) * map->surface_count);
2271    push_map->block_count = map->surface_count;
2272 
2273    /* Count used bindings, assign embedded sampler indices & add push blocks
2274     * for promotion to push constants
2275     */
2276    unsigned used_binding_count = 0;
2277    for (uint32_t set = 0; set < layout->num_sets; set++) {
2278       struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2279       if (!set_layout)
2280          continue;
2281 
2282       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2283          if (state.set[set].binding[b].use_count == 0)
2284             continue;
2285 
2286          used_binding_count++;
2287 
2288          const struct anv_descriptor_set_binding_layout *bind_layout =
2289             &set_layout->binding[b];
2290 
2291          if (state.set[set].binding[b].properties & BINDING_PROPERTY_EMBEDDED_SAMPLER)
2292             add_embedded_sampler_entry(&state, map, set, b);
2293 
2294          if (binding_is_promotable_to_push(set_layout, bind_layout)) {
2295             if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
2296                state.set[set].binding[b].push_block = push_map->block_count;
2297                for (unsigned i = 0; i < bind_layout->array_size; i++)
2298                   add_push_entry(push_map, set, b, i, layout, bind_layout);
2299             } else {
2300                state.set[set].binding[b].push_block = state.set[set].desc_offset;
2301             }
2302          }
2303       }
2304    }
2305 
2306    struct binding_info *infos =
2307       rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
2308    used_binding_count = 0;
2309    for (uint32_t set = 0; set < layout->num_sets; set++) {
2310       const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2311       if (!set_layout)
2312          continue;
2313 
2314       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2315          if (state.set[set].binding[b].use_count == 0)
2316             continue;
2317 
2318          const struct anv_descriptor_set_binding_layout *binding =
2319                &layout->set[set].layout->binding[b];
2320 
2321          /* Do a fixed-point calculation to generate a score based on the
2322           * number of uses and the binding array size.  We shift by 7 instead
2323           * of 8 because we're going to use the top bit below to make
2324           * everything which does not support bindless super higher priority
2325           * than things which do.
2326           */
2327          uint16_t score = ((uint16_t)state.set[set].binding[b].use_count << 7) /
2328                           binding->array_size;
2329 
2330          /* If the descriptor type doesn't support bindless then put it at the
2331           * beginning so we guarantee it gets a slot.
2332           */
2333          if (!anv_descriptor_supports_bindless(pdevice, set_layout, binding))
2334             score |= 1 << 15;
2335 
2336          infos[used_binding_count++] = (struct binding_info) {
2337             .set = set,
2338             .binding = b,
2339             .score = score,
2340          };
2341       }
2342    }
2343 
2344    /* Order the binding infos based on score with highest scores first.  If
2345     * scores are equal we then order by set and binding.
2346     */
2347    qsort(infos, used_binding_count, sizeof(struct binding_info),
2348          compare_binding_infos);
2349 
2350    for (unsigned i = 0; i < used_binding_count; i++) {
2351       unsigned set = infos[i].set, b = infos[i].binding;
2352       assert(layout->set[set].layout);
2353       const struct anv_descriptor_set_layout *set_layout =
2354          layout->set[set].layout;
2355       const struct anv_descriptor_set_binding_layout *binding =
2356             &set_layout->binding[b];
2357 
2358       const uint32_t array_size = binding->array_size;
2359 
2360       if (binding->dynamic_offset_index >= 0)
2361          state.has_dynamic_buffers = true;
2362 
2363       const unsigned array_multiplier = bti_multiplier(&state, set, b);
2364       assert(array_multiplier >= 1);
2365 
2366       /* Assume bindless by default */
2367       state.set[set].binding[b].surface_offset = BINDLESS_OFFSET;
2368       state.set[set].binding[b].sampler_offset = BINDLESS_OFFSET;
2369 
2370       if (binding_should_use_surface_binding_table(&state, binding, set, b)) {
2371          if (map->surface_count + array_size * array_multiplier > MAX_BINDING_TABLE_SIZE ||
2372              anv_descriptor_requires_bindless(pdevice, set_layout, binding) ||
2373              brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2374             /* If this descriptor doesn't fit in the binding table or if it
2375              * requires bindless for some reason, flag it as bindless.
2376              */
2377             assert(anv_descriptor_supports_bindless(pdevice, set_layout, binding));
2378          } else {
2379             state.set[set].binding[b].surface_offset = map->surface_count;
2380             if (binding->dynamic_offset_index < 0) {
2381                struct anv_sampler **samplers = binding->immutable_samplers;
2382                uint8_t max_planes = bti_multiplier(&state, set, b);
2383                for (unsigned i = 0; i < binding->array_size; i++) {
2384                   uint8_t planes = samplers ? samplers[i]->n_planes : 1;
2385                   for (uint8_t p = 0; p < max_planes; p++) {
2386                      if (p < planes) {
2387                         add_bti_entry(map, set, b, i, p, binding);
2388                      } else {
2389                         add_null_bti_entry(map);
2390                      }
2391                   }
2392                }
2393             } else {
2394                for (unsigned i = 0; i < binding->array_size; i++)
2395                   add_dynamic_bti_entry(map, set, b, i, layout, binding);
2396             }
2397          }
2398          assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2399       }
2400 
2401       if (binding_should_use_sampler_binding_table(&state, binding)) {
2402          if (map->sampler_count + array_size * array_multiplier > MAX_SAMPLER_TABLE_SIZE ||
2403              anv_descriptor_requires_bindless(pdevice, set_layout, binding) ||
2404              brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2405             /* If this descriptor doesn't fit in the binding table or if it
2406              * requires bindless for some reason, flag it as bindless.
2407              *
2408              * We also make large sampler arrays bindless because we can avoid
2409              * using indirect sends thanks to bindless samplers being packed
2410              * less tightly than the sampler table.
2411              */
2412             assert(anv_descriptor_supports_bindless(pdevice, set_layout, binding));
2413          } else {
2414             state.set[set].binding[b].sampler_offset = map->sampler_count;
2415             uint8_t max_planes = bti_multiplier(&state, set, b);
2416             for (unsigned i = 0; i < binding->array_size; i++) {
2417                for (uint8_t p = 0; p < max_planes; p++) {
2418                   add_sampler_entry(map, set, b, i, p, layout, binding);
2419                }
2420             }
2421          }
2422       }
2423 
2424       if (binding->data & ANV_DESCRIPTOR_INLINE_UNIFORM) {
2425          state.set[set].binding[b].surface_offset = state.set[set].desc_offset;
2426       }
2427 
2428 #if 0
2429       fprintf(stderr, "set=%u binding=%u surface_offset=0x%08x require_bindless=%u type=%s\n",
2430               set, b,
2431               state.set[set].binding[b].surface_offset,
2432               anv_descriptor_requires_bindless(pdevice, set_layout, binding),
2433               vk_DescriptorType_to_str(binding->type));
2434 #endif
2435    }
2436 
2437    /* Before we do the normal lowering, we look for any SSBO operations
2438     * that we can lower to the BTI model and lower them up-front.  The BTI
2439     * model can perform better than the A64 model for a couple reasons:
2440     *
2441     *  1. 48-bit address calculations are potentially expensive and using
2442     *     the BTI model lets us simply compute 32-bit offsets and the
2443     *     hardware adds the 64-bit surface base address.
2444     *
2445     *  2. The BTI messages, because they use surface states, do bounds
2446     *     checking for us.  With the A64 model, we have to do our own
2447     *     bounds checking and this means wider pointers and extra
2448     *     calculations and branching in the shader.
2449     *
2450     * The solution to both of these is to convert things to the BTI model
2451     * opportunistically.  The reason why we need to do this as a pre-pass
2452     * is for two reasons:
2453     *
2454     *  1. The BTI model requires nir_address_format_32bit_index_offset
2455     *     pointers which are not the same type as the pointers needed for
2456     *     the A64 model.  Because all our derefs are set up for the A64
2457     *     model (in case we have variable pointers), we have to crawl all
2458     *     the way back to the vulkan_resource_index intrinsic and build a
2459     *     completely fresh index+offset calculation.
2460     *
2461     *  2. Because the variable-pointers-capable lowering that we do as part
2462     *     of apply_pipeline_layout_block is destructive (It really has to
2463     *     be to handle variable pointers properly), we've lost the deref
2464     *     information by the time we get to the load/store/atomic
2465     *     intrinsics in that pass.
2466     */
2467    nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
2468                                 nir_metadata_control_flow,
2469                                 &state);
2470 
2471    /* We just got rid of all the direct access.  Delete it so it's not in the
2472     * way when we do our indirect lowering.
2473     */
2474    nir_opt_dce(shader);
2475 
2476    nir_shader_instructions_pass(shader, apply_pipeline_layout,
2477                                 nir_metadata_none,
2478                                 &state);
2479 
2480    ralloc_free(mem_ctx);
2481 
2482    if (brw_shader_stage_is_bindless(shader->info.stage)) {
2483       assert(map->surface_count == 0);
2484       assert(map->sampler_count == 0);
2485    }
2486 
2487 #if 0
2488    fprintf(stderr, "bti:\n");
2489    for (unsigned i = 0; i < map->surface_count; i++) {
2490       fprintf(stderr, "  %03i: set=%03u binding=%06i index=%u plane=%u set_offset=0x%08x dyn_offset=0x%08x\n", i,
2491               map->surface_to_descriptor[i].set,
2492               map->surface_to_descriptor[i].binding,
2493               map->surface_to_descriptor[i].index,
2494               map->surface_to_descriptor[i].plane,
2495               map->surface_to_descriptor[i].set_offset,
2496               map->surface_to_descriptor[i].dynamic_offset_index);
2497    }
2498    fprintf(stderr, "sti:\n");
2499    for (unsigned i = 0; i < map->sampler_count; i++) {
2500       fprintf(stderr, "  %03i: set=%03u binding=%06i index=%u plane=%u\n", i,
2501               map->sampler_to_descriptor[i].set,
2502               map->sampler_to_descriptor[i].binding,
2503               map->sampler_to_descriptor[i].index,
2504               map->sampler_to_descriptor[i].plane);
2505    }
2506 #endif
2507 
2508    /* Now that we're done computing the surface and sampler portions of the
2509     * bind map, hash them.  This lets us quickly determine if the actual
2510     * mapping has changed and not just a no-op pipeline change.
2511     */
2512    _mesa_sha1_compute(map->surface_to_descriptor,
2513                       map->surface_count * sizeof(struct anv_pipeline_binding),
2514                       map->surface_sha1);
2515    _mesa_sha1_compute(map->sampler_to_descriptor,
2516                       map->sampler_count * sizeof(struct anv_pipeline_binding),
2517                       map->sampler_sha1);
2518 }
2519