• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/brw_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29 
30 #include "vk_enum_to_str.h"
31 
32 #include "genxml/genX_bits.h"
33 
34 /* Sampler tables don't actually have a maximum size but we pick one just so
35  * that we don't end up emitting too much state on-the-fly.
36  */
37 #define MAX_SAMPLER_TABLE_SIZE 128
38 #define BINDLESS_OFFSET        255
39 
40 #define sizeof_field(type, field) sizeof(((type *)0)->field)
41 
42 enum binding_property {
43    BINDING_PROPERTY_NORMAL   = BITFIELD_BIT(0),
44    BINDING_PROPERTY_PUSHABLE = BITFIELD_BIT(1),
45 };
46 
47 struct apply_pipeline_layout_state {
48    const struct anv_physical_device *pdevice;
49 
50    const struct anv_pipeline_sets_layout *layout;
51    nir_address_format desc_addr_format;
52    nir_address_format ssbo_addr_format;
53    nir_address_format ubo_addr_format;
54 
55    /* Place to flag lowered instructions so we don't lower them twice */
56    struct set *lowered_instrs;
57 
58    bool uses_constants;
59    bool has_dynamic_buffers;
60    bool has_independent_sets;
61    uint8_t constants_offset;
62    struct {
63       bool desc_buffer_used;
64       uint8_t desc_offset;
65 
66       struct {
67          uint8_t use_count;
68 
69          /* Binding table offset */
70          uint8_t surface_offset;
71 
72          /* Sampler table offset */
73          uint8_t sampler_offset;
74 
75          /* Properties of the binding */
76          enum binding_property properties;
77 
78          /* For each binding is identified with a unique identifier for push
79           * computation.
80           */
81          uint32_t push_block;
82       } *binding;
83    } set[MAX_SETS];
84 };
85 
86 /* For a given binding, tells us how many binding table entries are needed per
87  * element.
88  */
89 static uint32_t
bti_multiplier(const struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)90 bti_multiplier(const struct apply_pipeline_layout_state *state,
91                uint32_t set, uint32_t binding)
92 {
93    const struct anv_descriptor_set_layout *set_layout =
94       state->layout->set[set].layout;
95    const struct anv_descriptor_set_binding_layout *bind_layout =
96       &set_layout->binding[binding];
97 
98    return bind_layout->max_plane_count == 0 ? 1 : bind_layout->max_plane_count;
99 }
100 
101 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)102 addr_format_for_desc_type(VkDescriptorType desc_type,
103                           struct apply_pipeline_layout_state *state)
104 {
105    switch (desc_type) {
106    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
107    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
108       return state->ssbo_addr_format;
109 
110    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
111    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
112       return state->ubo_addr_format;
113 
114    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
115       return state->desc_addr_format;
116 
117    default:
118       unreachable("Unsupported descriptor type");
119    }
120 }
121 
122 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)123 add_binding(struct apply_pipeline_layout_state *state,
124             uint32_t set, uint32_t binding)
125 {
126    const struct anv_descriptor_set_binding_layout *bind_layout =
127       &state->layout->set[set].layout->binding[binding];
128 
129    assert(set < state->layout->num_sets);
130    assert(binding < state->layout->set[set].layout->binding_count);
131 
132    if (state->set[set].binding[binding].use_count < UINT8_MAX)
133       state->set[set].binding[binding].use_count++;
134 
135    /* Only flag the descriptor buffer as used if there's actually data for
136     * this binding.  This lets us be lazy and call this function constantly
137     * without worrying about unnecessarily enabling the buffer.
138     */
139    if (bind_layout->descriptor_surface_stride)
140       state->set[set].desc_buffer_used = true;
141 
142    if (bind_layout->dynamic_offset_index >= 0)
143       state->has_dynamic_buffers = true;
144 
145    state->set[set].binding[binding].properties |= BINDING_PROPERTY_NORMAL;
146 }
147 
148 const VkDescriptorBindingFlags non_pushable_binding_flags =
149    VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT |
150    VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT |
151    VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT;
152 
153 static void
add_binding_type(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding,VkDescriptorType type)154 add_binding_type(struct apply_pipeline_layout_state *state,
155                  uint32_t set, uint32_t binding, VkDescriptorType type)
156 {
157    add_binding(state, set, binding);
158 
159    if ((state->layout->set[set].layout->binding[binding].flags &
160         non_pushable_binding_flags) == 0 &&
161        (state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
162         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
163         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
164         state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) &&
165        (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
166         type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK))
167       state->set[set].binding[binding].properties |= BINDING_PROPERTY_PUSHABLE;
168 }
169 
170 static void
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)171 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
172 {
173    nir_deref_instr *deref = nir_src_as_deref(src);
174    nir_variable *var = nir_deref_instr_get_variable(deref);
175    add_binding(state, var->data.descriptor_set, var->data.binding);
176 }
177 
178 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)179 add_tex_src_binding(struct apply_pipeline_layout_state *state,
180                     nir_tex_instr *tex, nir_tex_src_type deref_src_type)
181 {
182    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
183    if (deref_src_idx < 0)
184       return;
185 
186    add_deref_src_binding(state, tex->src[deref_src_idx].src);
187 }
188 
189 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)190 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
191 {
192    struct apply_pipeline_layout_state *state = _state;
193 
194    switch (instr->type) {
195    case nir_instr_type_intrinsic: {
196       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
197       switch (intrin->intrinsic) {
198       case nir_intrinsic_vulkan_resource_index:
199          add_binding_type(state,
200                           nir_intrinsic_desc_set(intrin),
201                           nir_intrinsic_binding(intrin),
202                           nir_intrinsic_desc_type(intrin));
203          break;
204 
205       case nir_intrinsic_image_deref_load:
206       case nir_intrinsic_image_deref_store:
207       case nir_intrinsic_image_deref_atomic:
208       case nir_intrinsic_image_deref_atomic_swap:
209       case nir_intrinsic_image_deref_size:
210       case nir_intrinsic_image_deref_samples:
211       case nir_intrinsic_image_deref_load_param_intel:
212       case nir_intrinsic_image_deref_load_raw_intel:
213       case nir_intrinsic_image_deref_store_raw_intel:
214       case nir_intrinsic_image_deref_sparse_load:
215          add_deref_src_binding(state, intrin->src[0]);
216          break;
217 
218       case nir_intrinsic_load_constant:
219          state->uses_constants = true;
220          break;
221 
222       default:
223          break;
224       }
225       break;
226    }
227    case nir_instr_type_tex: {
228       nir_tex_instr *tex = nir_instr_as_tex(instr);
229       add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
230       add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
231       break;
232    }
233    default:
234       break;
235    }
236 
237    return false;
238 }
239 
240 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)241 find_descriptor_for_index_src(nir_src src,
242                               struct apply_pipeline_layout_state *state)
243 {
244    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
245 
246    while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
247       intrin = nir_src_as_intrinsic(intrin->src[0]);
248 
249    if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
250       return NULL;
251 
252    return intrin;
253 }
254 
255 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)256 descriptor_has_bti(nir_intrinsic_instr *intrin,
257                    struct apply_pipeline_layout_state *state)
258 {
259    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
260 
261    uint32_t set = nir_intrinsic_desc_set(intrin);
262    uint32_t binding = nir_intrinsic_binding(intrin);
263    const struct anv_descriptor_set_binding_layout *bind_layout =
264       &state->layout->set[set].layout->binding[binding];
265 
266    uint32_t surface_index;
267    if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
268       surface_index = state->set[set].desc_offset;
269    else
270       surface_index = state->set[set].binding[binding].surface_offset;
271 
272    /* Only lower to a BTI message if we have a valid binding table index. */
273    return surface_index < MAX_BINDING_TABLE_SIZE;
274 }
275 
276 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)277 descriptor_address_format(nir_intrinsic_instr *intrin,
278                           struct apply_pipeline_layout_state *state)
279 {
280    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
281 
282    return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
283 }
284 
285 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)286 nir_deref_find_descriptor(nir_deref_instr *deref,
287                           struct apply_pipeline_layout_state *state)
288 {
289    while (1) {
290       /* Nothing we will use this on has a variable */
291       assert(deref->deref_type != nir_deref_type_var);
292 
293       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
294       if (!parent)
295          break;
296 
297       deref = parent;
298    }
299    assert(deref->deref_type == nir_deref_type_cast);
300 
301    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
302    if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
303       return NULL;
304 
305    return find_descriptor_for_index_src(intrin->src[0], state);
306 }
307 
308 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,const struct apply_pipeline_layout_state * state)309 build_load_descriptor_mem(nir_builder *b,
310                           nir_def *desc_addr, unsigned desc_offset,
311                           unsigned num_components, unsigned bit_size,
312                           const struct apply_pipeline_layout_state *state)
313 
314 {
315    switch (state->desc_addr_format) {
316    case nir_address_format_64bit_global_32bit_offset: {
317       nir_def *base_addr =
318          nir_pack_64_2x32(b, nir_trim_vector(b, desc_addr, 2));
319       nir_def *offset32 =
320          nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset);
321 
322       return nir_load_global_constant_offset(b, num_components, bit_size,
323                                              base_addr, offset32,
324                                              .align_mul = 8,
325                                              .align_offset = desc_offset % 8);
326    }
327 
328    case nir_address_format_32bit_index_offset: {
329       nir_def *surface_index = nir_channel(b, desc_addr, 0);
330       nir_def *offset32 =
331          nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
332 
333       return nir_load_ubo(b, num_components, bit_size,
334                           surface_index, offset32,
335                           .align_mul = 8,
336                           .align_offset = desc_offset % 8,
337                           .range_base = 0,
338                           .range = num_components * bit_size / 8);
339    }
340 
341    default:
342       unreachable("Unsupported address format");
343    }
344 }
345 
346 /* When using direct descriptor, we do not have a structure to read in memory
347  * like anv_address_range_descriptor where all the fields match perfectly the
348  * vec4 address format we need to generate for A64 messages. Instead we need
349  * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
350  * enough for the surface address, lot less fun for the size where you have to
351  * combine 3 fields scattered over multiple dwords, add one to the total and
352  * do a check against the surface type to deal with the null descriptors.
353  *
354  * Fortunately we can reuse the Auxiliary surface adddress field to stash our
355  * buffer size and just load a vec4.
356  */
357 static nir_def *
build_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)358 build_optimized_load_render_surface_state_address(nir_builder *b,
359                                                   nir_def *desc_addr,
360                                                   struct apply_pipeline_layout_state *state)
361 
362 {
363    const struct intel_device_info *devinfo = &state->pdevice->info;
364 
365    nir_def *surface_addr =
366       build_load_descriptor_mem(b, desc_addr,
367                                 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
368                                 4, 32, state);
369    nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
370    nir_def *addr_udw = nir_channel(b, surface_addr, 1);
371    nir_def *length = nir_channel(b, surface_addr, 3);
372 
373    return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
374 }
375 
376 /* When using direct descriptor, we do not have a structure to read in memory
377  * like anv_address_range_descriptor where all the fields match perfectly the
378  * vec4 address format we need to generate for A64 messages. Instead we need
379  * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
380  * enough for the surface address, lot less fun for the size.
381  */
382 static nir_def *
build_non_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)383 build_non_optimized_load_render_surface_state_address(nir_builder *b,
384                                                       nir_def *desc_addr,
385                                                       struct apply_pipeline_layout_state *state)
386 
387 {
388    const struct intel_device_info *devinfo = &state->pdevice->info;
389 
390    assert(((RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) +
391             RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo) - 1) -
392            RENDER_SURFACE_STATE_Width_start(devinfo)) / 8 <= 32);
393 
394    nir_def *surface_addr =
395       build_load_descriptor_mem(b, desc_addr,
396                                 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
397                                 DIV_ROUND_UP(RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo), 32),
398                                 32, state);
399    nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
400    nir_def *addr_udw = nir_channel(b, surface_addr, 1);
401 
402    /* Take all the RENDER_SURFACE_STATE fields from the beginning of the
403     * structure up to the Depth field.
404     */
405    const uint32_t type_sizes_dwords =
406       DIV_ROUND_UP(RENDER_SURFACE_STATE_Depth_start(devinfo) +
407                    RENDER_SURFACE_STATE_Depth_bits(devinfo), 32);
408    nir_def *type_sizes =
409       build_load_descriptor_mem(b, desc_addr, 0, type_sizes_dwords, 32, state);
410 
411    const unsigned width_start = RENDER_SURFACE_STATE_Width_start(devinfo);
412    /* SKL PRMs, Volume 2d: Command Reference: Structures, RENDER_SURFACE_STATE
413     *
414     *    Width:  "bits [6:0]   of the number of entries in the buffer - 1"
415     *    Height: "bits [20:7]  of the number of entries in the buffer - 1"
416     *    Depth:  "bits [31:21] of the number of entries in the buffer - 1"
417     */
418    const unsigned width_bits = 7;
419    nir_def *width =
420       nir_iand_imm(b,
421                    nir_ishr_imm(b,
422                                 nir_channel(b, type_sizes, width_start / 32),
423                                 width_start % 32),
424                    (1u << width_bits) - 1);
425 
426    const unsigned height_start = RENDER_SURFACE_STATE_Height_start(devinfo);
427    const unsigned height_bits = RENDER_SURFACE_STATE_Height_bits(devinfo);
428    nir_def *height =
429       nir_iand_imm(b,
430                    nir_ishr_imm(b,
431                                 nir_channel(b, type_sizes, height_start / 32),
432                                 height_start % 32),
433                    (1u << height_bits) - 1);
434 
435    const unsigned depth_start = RENDER_SURFACE_STATE_Depth_start(devinfo);
436    const unsigned depth_bits = RENDER_SURFACE_STATE_Depth_bits(devinfo);
437    nir_def *depth =
438       nir_iand_imm(b,
439                    nir_ishr_imm(b,
440                                 nir_channel(b, type_sizes, depth_start / 32),
441                                 depth_start % 32),
442                    (1u << depth_bits) - 1);
443 
444    nir_def *length = width;
445    length = nir_ior(b, length, nir_ishl_imm(b, height, width_bits));
446    length = nir_ior(b, length, nir_ishl_imm(b, depth, width_bits + height_bits));
447    length = nir_iadd_imm(b, length, 1);
448 
449    /* Check the surface type, if it's SURFTYPE_NULL, set the length of the
450     * buffer to 0.
451     */
452    const unsigned type_start = RENDER_SURFACE_STATE_SurfaceType_start(devinfo);
453    const unsigned type_dw = type_start / 32;
454    nir_def *type =
455       nir_iand_imm(b,
456                    nir_ishr_imm(b,
457                                 nir_channel(b, type_sizes, type_dw),
458                                 type_start % 32),
459                    (1u << RENDER_SURFACE_STATE_SurfaceType_bits(devinfo)) - 1);
460 
461    length = nir_bcsel(b,
462                       nir_ieq_imm(b, type, 7 /* SURFTYPE_NULL */),
463                       nir_imm_int(b, 0), length);
464 
465    return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
466 }
467 
468 static inline nir_def *
build_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)469 build_load_render_surface_state_address(nir_builder *b,
470                                         nir_def *desc_addr,
471                                         struct apply_pipeline_layout_state *state)
472 {
473    if (state->pdevice->isl_dev.buffer_length_in_aux_addr)
474       return build_optimized_load_render_surface_state_address(b, desc_addr, state);
475    return build_non_optimized_load_render_surface_state_address(b, desc_addr, state);
476 }
477 
478 /* Load the depth of a 3D storage image.
479  *
480  * Either by reading the indirect descriptor value, or reading the value from
481  * RENDER_SURFACE_STATE.
482  *
483  * This is necessary for VK_EXT_image_sliced_view_of_3d.
484  */
485 static nir_def *
build_load_storage_3d_image_depth(nir_builder * b,nir_def * desc_addr,nir_def * resinfo_depth,struct apply_pipeline_layout_state * state)486 build_load_storage_3d_image_depth(nir_builder *b,
487                                   nir_def *desc_addr,
488                                   nir_def *resinfo_depth,
489                                   struct apply_pipeline_layout_state *state)
490 
491 {
492    const struct intel_device_info *devinfo = &state->pdevice->info;
493 
494    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
495       return build_load_descriptor_mem(
496          b, desc_addr,
497          offsetof(struct anv_storage_image_descriptor, image_depth),
498          1, 32, state);
499    } else {
500       nir_def *data = build_load_descriptor_mem(
501          b, desc_addr,
502          RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) / 8,
503          1, 32, state);
504       nir_def *depth =
505          nir_ushr_imm(
506             b, data,
507             RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) % 32);
508       depth = nir_iand_imm(
509          b, depth,
510          (1u << RENDER_SURFACE_STATE_RenderTargetViewExtent_bits(devinfo)) - 1);
511       depth = nir_iadd_imm(b, depth, 1);
512 
513       /* Return the minimum between the RESINFO value and the
514        * RENDER_SURFACE_STATE::RenderTargetViewExtent value.
515        *
516        * Both are expressed for the current view LOD, but in the case of a
517        * SURFTYPE_NULL, RESINFO will return the right value, while the -1
518        * value in RENDER_SURFACE_STATE should be ignored.
519        */
520       return nir_umin(b, resinfo_depth, depth);
521    }
522 }
523 /** Build a Vulkan resource index
524  *
525  * A "resource index" is the term used by our SPIR-V parser and the relevant
526  * NIR intrinsics for a reference into a descriptor set.  It acts much like a
527  * deref in NIR except that it accesses opaque descriptors instead of memory.
528  *
529  * Coming out of SPIR-V, both the resource indices (in the form of
530  * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
531  * of nir_deref_instr) use the same vector component/bit size.  The meaning
532  * of those values for memory derefs (nir_deref_instr) is given by the
533  * nir_address_format associated with the descriptor type.  For resource
534  * indices, it's an entirely internal to ANV encoding which describes, in some
535  * sense, the address of the descriptor.  Thanks to the NIR/SPIR-V rules, it
536  * must be packed into the same size SSA values as a memory address.  For this
537  * reason, the actual encoding may depend both on the address format for
538  * memory derefs and the descriptor address format.
539  *
540  * The load_vulkan_descriptor intrinsic exists to provide a transition point
541  * between these two forms of derefs: descriptor and memory.
542  */
543 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,struct apply_pipeline_layout_state * state)544 build_res_index(nir_builder *b,
545                 uint32_t set, uint32_t binding,
546                 nir_def *array_index,
547                 struct apply_pipeline_layout_state *state)
548 {
549    const struct anv_descriptor_set_binding_layout *bind_layout =
550       &state->layout->set[set].layout->binding[binding];
551 
552    uint32_t array_size = bind_layout->array_size;
553 
554    uint32_t set_idx;
555    switch (state->desc_addr_format) {
556    case nir_address_format_64bit_global_32bit_offset:
557       /* Descriptor set buffer accesses will go through A64 messages, so the
558        * index to get the descriptor set buffer address is located in the
559        * anv_push_constants::desc_surface_offsets and it's indexed by the set
560        * number.
561        */
562       set_idx = set;
563       break;
564 
565    case nir_address_format_32bit_index_offset:
566       /* Descriptor set buffer accesses will go through the binding table. The
567        * offset is the entry in the binding table.
568        */
569       assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
570       set_idx = state->set[set].desc_offset;
571       break;
572 
573    default:
574       unreachable("Unsupported address format");
575    }
576 
577    assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
578       nir_def *dynamic_offset_index;
579       if (bind_layout->dynamic_offset_index >= 0) {
580          if (state->has_independent_sets) {
581             nir_def *dynamic_offset_start =
582                nir_load_desc_set_dynamic_index_intel(b, nir_imm_int(b, set));
583             dynamic_offset_index =
584                nir_iadd_imm(b, dynamic_offset_start,
585                             bind_layout->dynamic_offset_index);
586          } else {
587             dynamic_offset_index =
588                nir_imm_int(b,
589                            state->layout->set[set].dynamic_offset_start +
590                            bind_layout->dynamic_offset_index);
591          }
592       } else {
593          dynamic_offset_index = nir_imm_int(b, 0xff); /* No dynamic offset */
594       }
595 
596    const uint32_t desc_bti = state->set[set].binding[binding].surface_offset;
597    assert(bind_layout->descriptor_surface_stride % 8 == 0);
598    const uint32_t desc_stride = bind_layout->descriptor_surface_stride / 8;
599 
600       nir_def *packed =
601          nir_ior_imm(b,
602                      dynamic_offset_index,
603                      (desc_stride << 24) |
604                      (desc_bti << 16)    |
605                      (set_idx << 8));
606 
607 
608    return nir_vec4(b, packed,
609                       nir_imm_int(b, bind_layout->descriptor_surface_offset),
610                       nir_imm_int(b, array_size - 1),
611                       array_index);
612 }
613 
614 struct res_index_defs {
615    nir_def *bti_idx;
616    nir_def *set_idx;
617    nir_def *dyn_offset_base;
618    nir_def *desc_offset_base;
619    nir_def *array_index;
620    nir_def *desc_stride;
621 };
622 
623 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)624 unpack_res_index(nir_builder *b, nir_def *index)
625 {
626    struct res_index_defs defs;
627 
628    nir_def *packed = nir_channel(b, index, 0);
629    defs.desc_stride =
630       nir_imul_imm(b, nir_extract_u8(b, packed, nir_imm_int(b, 3)), 8);
631    defs.bti_idx = nir_extract_u8(b, packed, nir_imm_int(b, 2));
632    defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
633    defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
634 
635    defs.desc_offset_base = nir_channel(b, index, 1);
636    defs.array_index = nir_umin(b, nir_channel(b, index, 2),
637                                   nir_channel(b, index, 3));
638 
639    return defs;
640 }
641 
642 /** Whether a surface is accessed through the bindless surface state heap */
643 static bool
is_binding_bindless(unsigned set,unsigned binding,bool sampler,const struct apply_pipeline_layout_state * state)644 is_binding_bindless(unsigned set, unsigned binding, bool sampler,
645                     const struct apply_pipeline_layout_state *state)
646 {
647    /* Has binding table entry has been allocated for this binding? */
648    if (sampler &&
649        state->set[set].binding[binding].sampler_offset != BINDLESS_OFFSET)
650       return false;
651    if (!sampler &&
652        state->set[set].binding[binding].surface_offset != BINDLESS_OFFSET)
653       return false;
654 
655    return true;
656 }
657 
658 /** Adjust a Vulkan resource index
659  *
660  * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
661  * For array descriptors, it allows us to adjust the array index.  Thanks to
662  * variable pointers, we cannot always fold this re-index operation into the
663  * vulkan_resource_index intrinsic and we have to do it based on nothing but
664  * the address format.
665  */
666 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta)667 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta)
668 {
669    return nir_vec4(b, nir_channel(b, orig, 0),
670                       nir_channel(b, orig, 1),
671                       nir_channel(b, orig, 2),
672                       nir_iadd(b, nir_channel(b, orig, 3), delta));
673 }
674 
675 /** Get the address for a descriptor given its resource index
676  *
677  * Because of the re-indexing operations, we can't bounds check descriptor
678  * array access until we have the final index.  That means we end up doing the
679  * bounds check here, if needed.  See unpack_res_index() for more details.
680  *
681  * This function takes both a bind_layout and a desc_type which are used to
682  * determine the descriptor stride for array descriptors.  The bind_layout is
683  * optional for buffer descriptor types.
684  */
685 static nir_def *
build_desc_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)686 build_desc_addr_for_res_index(nir_builder *b,
687                               const VkDescriptorType desc_type,
688                               nir_def *index, nir_address_format addr_format,
689                               struct apply_pipeline_layout_state *state)
690 {
691    struct res_index_defs res = unpack_res_index(b, index);
692 
693    nir_def *desc_offset = res.desc_offset_base;
694    if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
695       /* Compute the actual descriptor offset.  For inline uniform blocks,
696        * the array index is ignored as they are only allowed to be a single
697        * descriptor (not an array) and there is no concept of a "stride".
698        *
699        */
700       desc_offset =
701          nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
702    }
703 
704    switch (addr_format) {
705    case nir_address_format_64bit_global_32bit_offset:
706    case nir_address_format_64bit_bounded_global: {
707       switch (state->desc_addr_format) {
708       case nir_address_format_64bit_global_32bit_offset: {
709          nir_def *base_addr =
710             nir_load_desc_set_address_intel(b, res.set_idx);
711          return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
712                             nir_unpack_64_2x32_split_y(b, base_addr),
713                             nir_imm_int(b, UINT32_MAX),
714                             desc_offset);
715       }
716 
717       case nir_address_format_32bit_index_offset:
718          return nir_vec2(b, res.set_idx, desc_offset);
719 
720       default:
721          unreachable("Unhandled address format");
722       }
723    }
724 
725    case nir_address_format_32bit_index_offset:
726       assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
727       assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
728       return nir_vec2(b, res.set_idx, desc_offset);
729 
730    default:
731       unreachable("Unhandled address format");
732    }
733 }
734 
735 static nir_def *
build_desc_addr_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,const struct apply_pipeline_layout_state * state)736 build_desc_addr_for_binding(nir_builder *b,
737                             unsigned set, unsigned binding,
738                             nir_def *array_index,
739                             const struct apply_pipeline_layout_state *state)
740 {
741    const struct anv_descriptor_set_binding_layout *bind_layout =
742       &state->layout->set[set].layout->binding[binding];
743 
744    switch (state->desc_addr_format) {
745    case nir_address_format_64bit_global_32bit_offset:
746    case nir_address_format_64bit_bounded_global: {
747       nir_def *set_addr = nir_load_desc_set_address_intel(b, nir_imm_int(b, set));
748       nir_def *desc_offset =
749          nir_iadd_imm(b,
750                       nir_imul_imm(b,
751                                    array_index,
752                                    bind_layout->descriptor_surface_stride),
753                       bind_layout->descriptor_surface_offset);
754 
755       return nir_vec4(b, nir_unpack_64_2x32_split_x(b, set_addr),
756                          nir_unpack_64_2x32_split_y(b, set_addr),
757                          nir_imm_int(b, UINT32_MAX),
758                          desc_offset);
759    }
760 
761    case nir_address_format_32bit_index_offset:
762       return nir_vec2(b,
763                       nir_imm_int(b, state->set[set].desc_offset),
764                       nir_iadd_imm(b,
765                                    nir_imul_imm(b,
766                                                 array_index,
767                                                 bind_layout->descriptor_surface_stride),
768                                    bind_layout->descriptor_surface_offset));
769 
770    default:
771       unreachable("Unhandled address format");
772    }
773 }
774 
775 static unsigned
binding_descriptor_offset(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)776 binding_descriptor_offset(const struct apply_pipeline_layout_state *state,
777                           const struct anv_descriptor_set_binding_layout *bind_layout,
778                           bool sampler)
779 {
780    if (sampler &&
781        state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
782       return bind_layout->descriptor_sampler_offset;
783 
784    return bind_layout->descriptor_surface_offset;
785 }
786 
787 static unsigned
binding_descriptor_stride(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)788 binding_descriptor_stride(const struct apply_pipeline_layout_state *state,
789                           const struct anv_descriptor_set_binding_layout *bind_layout,
790                           bool sampler)
791 {
792    if (sampler &&
793        state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
794       return bind_layout->descriptor_sampler_stride;
795 
796    return bind_layout->descriptor_surface_stride;
797 }
798 
799 static nir_def *
build_surface_index_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)800 build_surface_index_for_binding(nir_builder *b,
801                                 unsigned set, unsigned binding,
802                                 nir_def *array_index,
803                                 unsigned plane,
804                                 bool non_uniform,
805                                 const struct apply_pipeline_layout_state *state)
806 {
807    const struct anv_descriptor_set_binding_layout *bind_layout =
808       &state->layout->set[set].layout->binding[binding];
809    const unsigned descriptor_offset =
810       binding_descriptor_offset(state, bind_layout, false /* sampler */);
811    const unsigned descriptor_stride =
812       binding_descriptor_stride(state, bind_layout, false /* sampler */);
813    const bool is_bindless =
814       is_binding_bindless(set, binding, false /* sampler */, state);
815 
816    nir_def *set_offset, *surface_index;
817    if (is_bindless) {
818       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
819          set_offset = nir_imm_int(b, 0xdeaddead);
820 
821          nir_def *desc_addr =
822             build_desc_addr_for_binding(b, set, binding, array_index, state);
823 
824          surface_index =
825             build_load_descriptor_mem(b, desc_addr, 0, 1, 32, state);
826       } else {
827          set_offset =
828             nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
829                                    .base = offsetof(struct anv_push_constants,
830                                                     desc_surface_offsets[set]),
831                                    .range = sizeof_field(struct anv_push_constants,
832                                                          desc_surface_offsets[set]));
833 
834          /* With bindless indexes are offsets in the descriptor buffer */
835          surface_index =
836             nir_iadd_imm(b,
837                          nir_imul_imm(b, array_index, descriptor_stride),
838                          descriptor_offset);
839          if (plane != 0) {
840             assert(plane < bind_layout->max_plane_count);
841             surface_index = nir_iadd_imm(b, surface_index,
842                                          plane * (descriptor_stride /
843                                                   bind_layout->max_plane_count));
844          }
845 
846          assert(descriptor_offset % 64 == 0);
847          assert(descriptor_stride % 64 == 0);
848       }
849    } else {
850       /* Unused */
851       set_offset = nir_imm_int(b, 0xdeaddead);
852 
853       unsigned bti_stride = bti_multiplier(state, set, binding);
854       assert(bti_stride >= 1);
855 
856       /* For Ycbcr descriptors, add the plane offset */
857       unsigned element_index = plane;
858 
859       /* With the binding table, it's an index in the table */
860       surface_index =
861          nir_iadd_imm(b, nir_imul_imm(b, array_index, bti_stride),
862                          state->set[set].binding[binding].surface_offset + element_index);
863       assert(state->set[set].binding[binding].surface_offset < MAX_BINDING_TABLE_SIZE);
864    }
865 
866    return nir_resource_intel(b,
867                              set_offset,
868                              surface_index,
869                              array_index,
870                              .desc_set = set,
871                              .binding = binding,
872                              .resource_block_intel = state->set[set].binding[binding].push_block,
873                              .resource_access_intel =
874                                 (is_bindless ? nir_resource_intel_bindless : 0) |
875                                 (non_uniform ? nir_resource_intel_non_uniform : 0) |
876                                 ((state->set[set].binding[binding].properties &
877                                   BINDING_PROPERTY_PUSHABLE) ? nir_resource_intel_pushable : 0));
878 }
879 
880 static nir_def *
build_sampler_handle_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)881 build_sampler_handle_for_binding(nir_builder *b,
882                                  unsigned set, unsigned binding,
883                                  nir_def *array_index,
884                                  unsigned plane,
885                                  bool non_uniform,
886                                  const struct apply_pipeline_layout_state *state)
887 {
888    const struct anv_descriptor_set_binding_layout *bind_layout =
889       &state->layout->set[set].layout->binding[binding];
890    const unsigned descriptor_offset =
891       binding_descriptor_offset(state, bind_layout, true /* sampler */);
892    const unsigned descriptor_stride =
893       binding_descriptor_stride(state, bind_layout, true /* sampler */);
894    const bool is_bindless =
895       is_binding_bindless(set, binding, true /* sampler */, state);
896    nir_def *set_offset, *sampler_index;
897 
898    if (is_bindless) {
899       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
900          set_offset = nir_imm_int(b, 0xdeaddead);
901 
902          nir_def *desc_addr =
903             build_desc_addr_for_binding(b, set, binding, array_index, state);
904 
905          /* This is anv_sampled_image_descriptor, the sampler handle is always
906           * in component 1.
907           */
908          nir_def *desc_data =
909             build_load_descriptor_mem(b, desc_addr, 0, 2, 32, state);
910 
911          sampler_index = nir_channel(b, desc_data, 1);
912       } else {
913          set_offset =
914             nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
915                                    .base = offsetof(struct anv_push_constants,
916                                                     desc_sampler_offsets[set]),
917                                    .range = sizeof_field(struct anv_push_constants,
918                                                          desc_sampler_offsets[set]));
919 
920          uint32_t base_offset = descriptor_offset;
921 
922          /* The SAMPLER_STATE can only be located at a 64 byte in the combined
923           * image/sampler case. Combined image/sampler is not supported to be
924           * used with mutable descriptor types.
925           */
926          if (bind_layout->data & ANV_DESCRIPTOR_SURFACE_SAMPLER)
927             base_offset += ANV_SURFACE_STATE_SIZE;
928 
929          if (plane != 0) {
930             assert(plane < bind_layout->max_plane_count);
931             base_offset += plane * (descriptor_stride /
932                                     bind_layout->max_plane_count);
933          }
934 
935          sampler_index =
936             nir_iadd_imm(b,
937                          nir_imul_imm(b, array_index, descriptor_stride),
938                          base_offset);
939       }
940    } else {
941       /* Unused */
942       set_offset = nir_imm_int(b, 0xdeaddead);
943 
944       sampler_index =
945          nir_iadd_imm(b, array_index,
946                       state->set[set].binding[binding].sampler_offset + plane);
947    }
948 
949    return nir_resource_intel(b, set_offset, sampler_index, array_index,
950                              .desc_set = set,
951                              .binding = binding,
952                              .resource_access_intel =
953                                 (is_bindless ? nir_resource_intel_bindless : 0) |
954                                 (non_uniform ? nir_resource_intel_non_uniform : 0) |
955                                 nir_resource_intel_sampler);
956 }
957 
958 static nir_def *
build_buffer_dynamic_offset_for_res_index(nir_builder * b,nir_def * dyn_offset_base,nir_def * array_index,struct apply_pipeline_layout_state * state)959 build_buffer_dynamic_offset_for_res_index(nir_builder *b,
960                                           nir_def *dyn_offset_base,
961                                           nir_def *array_index,
962                                           struct apply_pipeline_layout_state *state)
963 {
964    nir_def *dyn_offset_idx = nir_iadd(b, dyn_offset_base, array_index);
965 
966    nir_def *dyn_load =
967       nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
968                              .base = offsetof(struct anv_push_constants, dynamic_offsets),
969                              .range = sizeof_field(struct anv_push_constants, dynamic_offsets));
970 
971    return nir_bcsel(b, nir_ieq_imm(b, dyn_offset_base, 0xff),
972                        nir_imm_int(b, 0), dyn_load);
973 }
974 
975 /** Convert a Vulkan resource index into a buffer address
976  *
977  * In some cases, this does a  memory load from the descriptor set and, in
978  * others, it simply converts from one form to another.
979  *
980  * See build_res_index for details about each resource index format.
981  */
982 static nir_def *
build_indirect_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)983 build_indirect_buffer_addr_for_res_index(nir_builder *b,
984                                          const VkDescriptorType desc_type,
985                                          nir_def *res_index,
986                                          nir_address_format addr_format,
987                                          struct apply_pipeline_layout_state *state)
988 {
989    struct res_index_defs res = unpack_res_index(b, res_index);
990 
991    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
992       assert(addr_format == state->desc_addr_format);
993       return build_desc_addr_for_res_index(b, desc_type, res_index,
994                                            addr_format, state);
995    } else if (addr_format == nir_address_format_32bit_index_offset) {
996       return nir_vec2(b, nir_iadd(b, res.bti_idx, res.array_index),
997                          nir_imm_int(b, 0));
998    }
999 
1000    nir_def *desc_addr =
1001       build_desc_addr_for_res_index(b, desc_type, res_index,
1002                                     addr_format, state);
1003 
1004    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1005 
1006    if (state->has_dynamic_buffers) {
1007       /* This shader has dynamic offsets and we have no way of knowing
1008        * (save from the dynamic offset base index) if this buffer has a
1009        * dynamic offset.
1010        */
1011       nir_def *dyn_offset_idx =
1012          nir_iadd(b, res.dyn_offset_base, res.array_index);
1013 
1014       nir_def *dyn_load =
1015          nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
1016                                 .base = offsetof(struct anv_push_constants, dynamic_offsets),
1017                                 .range = MAX_DYNAMIC_BUFFERS * 4);
1018 
1019       nir_def *dynamic_offset =
1020          nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
1021                       nir_imm_int(b, 0), dyn_load);
1022 
1023       /* The dynamic offset gets added to the base pointer so that we
1024        * have a sliding window range.
1025        */
1026       nir_def *base_ptr =
1027          nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
1028       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1029       desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1030                          nir_unpack_64_2x32_split_y(b, base_ptr),
1031                          nir_channel(b, desc, 2),
1032                          nir_channel(b, desc, 3));
1033    }
1034 
1035    /* The last element of the vec4 is always zero.
1036     *
1037     * See also struct anv_address_range_descriptor
1038     */
1039    return nir_vec4(b, nir_channel(b, desc, 0),
1040                       nir_channel(b, desc, 1),
1041                       nir_channel(b, desc, 2),
1042                       nir_imm_int(b, 0));
1043 }
1044 
1045 static nir_def *
build_direct_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1046 build_direct_buffer_addr_for_res_index(nir_builder *b,
1047                                        const VkDescriptorType desc_type,
1048                                        nir_def *res_index,
1049                                        nir_address_format addr_format,
1050                                        struct apply_pipeline_layout_state *state)
1051 {
1052    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1053       assert(addr_format == state->desc_addr_format);
1054       return build_desc_addr_for_res_index(b, desc_type, res_index,
1055                                            addr_format, state);
1056    } else if (addr_format == nir_address_format_32bit_index_offset) {
1057       struct res_index_defs res = unpack_res_index(b, res_index);
1058 
1059       return nir_vec2(b, nir_iadd(b, res.desc_offset_base,
1060                                   nir_imul(b, res.array_index, res.desc_stride)),
1061                       nir_imm_int(b, 0));
1062    }
1063 
1064    nir_def *desc_addr =
1065       build_desc_addr_for_res_index(b, desc_type, res_index,
1066                                     addr_format, state);
1067 
1068    nir_def *addr =
1069       build_load_render_surface_state_address(b, desc_addr, state);
1070 
1071    if (state->has_dynamic_buffers) {
1072       struct res_index_defs res = unpack_res_index(b, res_index);
1073 
1074       /* This shader has dynamic offsets and we have no way of knowing (save
1075        * from the dynamic offset base index) if this buffer has a dynamic
1076        * offset.
1077        */
1078       nir_def *dynamic_offset =
1079          build_buffer_dynamic_offset_for_res_index(
1080             b, res.dyn_offset_base, res.array_index, state);
1081 
1082       /* The dynamic offset gets added to the base pointer so that we
1083        * have a sliding window range.
1084        */
1085       nir_def *base_ptr =
1086          nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1087       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1088       addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1089                          nir_unpack_64_2x32_split_y(b, base_ptr),
1090                          nir_channel(b, addr, 2),
1091                          nir_channel(b, addr, 3));
1092    }
1093 
1094    /* The last element of the vec4 is always zero.
1095     *
1096     * See also struct anv_address_range_descriptor
1097     */
1098    return nir_vec4(b, nir_channel(b, addr, 0),
1099                       nir_channel(b, addr, 1),
1100                       nir_channel(b, addr, 2),
1101                       nir_imm_int(b, 0));
1102 }
1103 
1104 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1105 build_buffer_addr_for_res_index(nir_builder *b,
1106                                 const VkDescriptorType desc_type,
1107                                 nir_def *res_index,
1108                                 nir_address_format addr_format,
1109                                 struct apply_pipeline_layout_state *state)
1110 {
1111    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT)
1112       return build_indirect_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1113    else
1114       return build_direct_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1115 }
1116 
1117 static nir_def *
build_buffer_addr_for_binding(nir_builder * b,const VkDescriptorType desc_type,unsigned set,unsigned binding,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1118 build_buffer_addr_for_binding(nir_builder *b,
1119                               const VkDescriptorType desc_type,
1120                               unsigned set,
1121                               unsigned binding,
1122                               nir_def *res_index,
1123                               nir_address_format addr_format,
1124                               struct apply_pipeline_layout_state *state)
1125 {
1126    if (addr_format != nir_address_format_32bit_index_offset)
1127       return build_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1128 
1129    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1130       const struct anv_descriptor_set_binding_layout *bind_layout =
1131          &state->layout->set[set].layout->binding[binding];
1132       return nir_vec2(b,
1133                       nir_imm_int(b, state->set[set].desc_offset),
1134                       nir_imm_int(b, bind_layout->descriptor_surface_offset));
1135    }
1136 
1137    struct res_index_defs res = unpack_res_index(b, res_index);
1138 
1139    return nir_vec2(b,
1140                    build_surface_index_for_binding(b, set, binding, res.array_index,
1141                                                    0 /* plane */,
1142                                                    false /* non_uniform */,
1143                                                    state),
1144                    nir_imm_int(b, 0));
1145 }
1146 
1147 /** Loads descriptor memory for a variable-based deref chain
1148  *
1149  * The deref chain has to terminate at a variable with a descriptor_set and
1150  * binding set.  This is used for images, textures, and samplers.
1151  */
1152 static nir_def *
build_load_var_deref_surface_handle(nir_builder * b,nir_deref_instr * deref,bool non_uniform,bool * out_is_bindless,struct apply_pipeline_layout_state * state)1153 build_load_var_deref_surface_handle(nir_builder *b, nir_deref_instr *deref,
1154                                     bool non_uniform,
1155                                     bool *out_is_bindless,
1156                                     struct apply_pipeline_layout_state *state)
1157 {
1158    nir_variable *var = nir_deref_instr_get_variable(deref);
1159 
1160    const uint32_t set = var->data.descriptor_set;
1161    const uint32_t binding = var->data.binding;
1162 
1163    *out_is_bindless =
1164       is_binding_bindless(set, binding, false /* sampler */, state);
1165 
1166    nir_def *array_index;
1167    if (deref->deref_type != nir_deref_type_var) {
1168       assert(deref->deref_type == nir_deref_type_array);
1169       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1170       array_index = deref->arr.index.ssa;
1171    } else {
1172       array_index = nir_imm_int(b, 0);
1173    }
1174 
1175    return build_surface_index_for_binding(b, set, binding, array_index,
1176                                           0 /* plane */, non_uniform, state);
1177 }
1178 
1179 /** A recursive form of build_res_index()
1180  *
1181  * This recursively walks a resource [re]index chain and builds the resource
1182  * index.  It places the new code with the resource [re]index operation in the
1183  * hopes of better CSE.  This means the cursor is not where you left it when
1184  * this function returns.
1185  */
1186 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)1187 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
1188                           nir_address_format addr_format,
1189                           uint32_t *set, uint32_t *binding,
1190                           struct apply_pipeline_layout_state *state)
1191 {
1192    if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
1193       b->cursor = nir_before_instr(&intrin->instr);
1194       *set = nir_intrinsic_desc_set(intrin);
1195       *binding = nir_intrinsic_binding(intrin);
1196       return build_res_index(b, *set, *binding, intrin->src[0].ssa, state);
1197    } else {
1198       assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
1199       nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
1200       nir_def *index =
1201          build_res_index_for_chain(b, parent, addr_format,
1202                                    set, binding, state);
1203 
1204       b->cursor = nir_before_instr(&intrin->instr);
1205 
1206       return build_res_reindex(b, index, intrin->src[1].ssa);
1207    }
1208 }
1209 
1210 /** Builds a buffer address for a given vulkan [re]index intrinsic
1211  *
1212  * The cursor is not where you left it when this function returns.
1213  */
1214 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1215 build_buffer_addr_for_idx_intrin(nir_builder *b,
1216                                  nir_intrinsic_instr *idx_intrin,
1217                                  nir_address_format addr_format,
1218                                  struct apply_pipeline_layout_state *state)
1219 {
1220    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1221    nir_def *res_index =
1222       build_res_index_for_chain(b, idx_intrin, addr_format,
1223                                 &set, &binding, state);
1224 
1225    const struct anv_descriptor_set_binding_layout *bind_layout =
1226       &state->layout->set[set].layout->binding[binding];
1227 
1228    return build_buffer_addr_for_binding(b, bind_layout->type,
1229                                         set, binding, res_index,
1230                                         addr_format, state);
1231 }
1232 
1233 /** Builds a buffer address for deref chain
1234  *
1235  * This assumes that you can chase the chain all the way back to the original
1236  * vulkan_resource_index intrinsic.
1237  *
1238  * The cursor is not where you left it when this function returns.
1239  */
1240 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1241 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
1242                             nir_address_format addr_format,
1243                             struct apply_pipeline_layout_state *state)
1244 {
1245    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1246    if (parent) {
1247       nir_def *addr =
1248          build_buffer_addr_for_deref(b, parent, addr_format, state);
1249 
1250       b->cursor = nir_before_instr(&deref->instr);
1251       return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
1252    }
1253 
1254    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
1255    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1256 
1257    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1258 
1259    b->cursor = nir_before_instr(&deref->instr);
1260 
1261    return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
1262 }
1263 
1264 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,bool is_atomic,struct apply_pipeline_layout_state * state)1265 try_lower_direct_buffer_intrinsic(nir_builder *b,
1266                                   nir_intrinsic_instr *intrin, bool is_atomic,
1267                                   struct apply_pipeline_layout_state *state)
1268 {
1269    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1270    if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
1271       return false;
1272 
1273    nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
1274    if (desc == NULL) {
1275       /* We should always be able to find the descriptor for UBO access. */
1276       assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
1277       return false;
1278    }
1279 
1280    const unsigned set = nir_intrinsic_desc_set(desc);
1281    const unsigned binding = nir_intrinsic_binding(desc);
1282 
1283    const struct anv_descriptor_set_binding_layout *bind_layout =
1284       &state->layout->set[set].layout->binding[binding];
1285 
1286    nir_address_format addr_format = descriptor_address_format(desc, state);
1287 
1288    /* Although we could lower non uniform binding table accesses with
1289     * nir_opt_non_uniform_access, we might as well use an A64 message and
1290     * avoid the loops inserted by that lowering pass.
1291     */
1292    if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
1293       return false;
1294 
1295    if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
1296       /* 64-bit atomics only support A64 messages so we can't lower them to
1297        * the index+offset model.
1298        */
1299       if (is_atomic && intrin->def.bit_size == 64 &&
1300           !state->pdevice->info.has_lsc)
1301          return false;
1302 
1303       /* If we don't have a BTI for this binding and we're using indirect
1304        * descriptors, we'll use A64 messages. This is handled in the main
1305        * lowering path.
1306        */
1307       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1308           !descriptor_has_bti(desc, state))
1309          return false;
1310 
1311       /* Rewrite to 32bit_index_offset whenever we can */
1312       addr_format = nir_address_format_32bit_index_offset;
1313    } else {
1314       assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
1315 
1316       /* If we don't have a BTI for this binding and we're using indirect
1317        * descriptors, we'll use A64 messages. This is handled in the main
1318        * lowering path.
1319        *
1320        * We make an exception for uniform blocks which are built from the
1321        * descriptor set base address + offset. There is no indirect data to
1322        * fetch.
1323        */
1324       if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1325           bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK &&
1326           !descriptor_has_bti(desc, state))
1327          return false;
1328 
1329       /* If this is an inline uniform and the shader stage is bindless, we
1330        * can't switch to 32bit_index_offset.
1331        */
1332       if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
1333           !brw_shader_stage_requires_bindless_resources(b->shader->info.stage))
1334          addr_format = nir_address_format_32bit_index_offset;
1335    }
1336 
1337    /* If a dynamic has not been assigned a binding table entry, we need to
1338     * bail here.
1339     */
1340    if ((bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1341         bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) &&
1342        !descriptor_has_bti(desc, state))
1343       return false;
1344 
1345    nir_def *addr =
1346       build_buffer_addr_for_deref(b, deref, addr_format, state);
1347 
1348    b->cursor = nir_before_instr(&intrin->instr);
1349    nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
1350 
1351    return true;
1352 }
1353 
1354 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)1355 lower_load_accel_struct_desc(nir_builder *b,
1356                              nir_intrinsic_instr *load_desc,
1357                              struct apply_pipeline_layout_state *state)
1358 {
1359    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1360 
1361    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1362 
1363    /* It doesn't really matter what address format we choose as
1364     * everything will constant-fold nicely.  Choose one that uses the
1365     * actual descriptor buffer.
1366     */
1367    const nir_address_format addr_format =
1368       nir_address_format_64bit_bounded_global;
1369 
1370    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1371    nir_def *res_index =
1372       build_res_index_for_chain(b, idx_intrin, addr_format,
1373                                 &set, &binding, state);
1374 
1375    b->cursor = nir_before_instr(&load_desc->instr);
1376 
1377    struct res_index_defs res = unpack_res_index(b, res_index);
1378    nir_def *desc_addr =
1379       build_desc_addr_for_binding(b, set, binding, res.array_index, state);
1380 
1381    /* Acceleration structure descriptors are always uint64_t */
1382    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
1383 
1384    assert(load_desc->def.bit_size == 64);
1385    assert(load_desc->def.num_components == 1);
1386    nir_def_rewrite_uses(&load_desc->def, desc);
1387    nir_instr_remove(&load_desc->instr);
1388 
1389    return true;
1390 }
1391 
1392 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)1393 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
1394 {
1395    struct apply_pipeline_layout_state *state = _state;
1396 
1397    if (instr->type != nir_instr_type_intrinsic)
1398       return false;
1399 
1400    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1401    switch (intrin->intrinsic) {
1402    case nir_intrinsic_load_deref:
1403    case nir_intrinsic_store_deref:
1404       return try_lower_direct_buffer_intrinsic(b, intrin, false, state);
1405 
1406    case nir_intrinsic_deref_atomic:
1407    case nir_intrinsic_deref_atomic_swap:
1408       return try_lower_direct_buffer_intrinsic(b, intrin, true, state);
1409 
1410    case nir_intrinsic_get_ssbo_size: {
1411       /* The get_ssbo_size intrinsic always just takes a
1412        * index/reindex intrinsic.
1413        */
1414       nir_intrinsic_instr *idx_intrin =
1415          find_descriptor_for_index_src(intrin->src[0], state);
1416       if (idx_intrin == NULL)
1417          return false;
1418 
1419       /* We just checked that this is a BTI descriptor */
1420       const nir_address_format addr_format =
1421          nir_address_format_32bit_index_offset;
1422 
1423       b->cursor = nir_before_instr(&intrin->instr);
1424 
1425       uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1426       nir_def *res_index =
1427          build_res_index_for_chain(b, idx_intrin, addr_format,
1428                                    &set, &binding, state);
1429 
1430       bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1431 
1432       nir_def *surface_index =
1433          build_surface_index_for_binding(b, set, binding,
1434                                          nir_channel(b, res_index, 3),
1435                                          0 /* plane */,
1436                                          non_uniform,
1437                                          state);
1438 
1439       nir_src_rewrite(&intrin->src[0], surface_index);
1440       _mesa_set_add(state->lowered_instrs, intrin);
1441       return true;
1442    }
1443 
1444    case nir_intrinsic_load_vulkan_descriptor:
1445       if (nir_intrinsic_desc_type(intrin) ==
1446           VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1447          return lower_load_accel_struct_desc(b, intrin, state);
1448       return false;
1449 
1450    default:
1451       return false;
1452    }
1453 }
1454 
1455 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1456 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1457                           struct apply_pipeline_layout_state *state)
1458 {
1459    b->cursor = nir_before_instr(&intrin->instr);
1460 
1461    nir_def *index =
1462       build_res_index(b, nir_intrinsic_desc_set(intrin),
1463                          nir_intrinsic_binding(intrin),
1464                          intrin->src[0].ssa,
1465                          state);
1466 
1467    assert(intrin->def.bit_size == index->bit_size);
1468    assert(intrin->def.num_components == index->num_components);
1469    nir_def_rewrite_uses(&intrin->def, index);
1470    nir_instr_remove(&intrin->instr);
1471 
1472    return true;
1473 }
1474 
1475 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1476 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1477                             struct apply_pipeline_layout_state *state)
1478 {
1479    b->cursor = nir_before_instr(&intrin->instr);
1480 
1481    nir_def *index =
1482       build_res_reindex(b, intrin->src[0].ssa,
1483                            intrin->src[1].ssa);
1484 
1485    assert(intrin->def.bit_size == index->bit_size);
1486    assert(intrin->def.num_components == index->num_components);
1487    nir_def_rewrite_uses(&intrin->def, index);
1488    nir_instr_remove(&intrin->instr);
1489 
1490    return true;
1491 }
1492 
1493 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1494 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1495                              struct apply_pipeline_layout_state *state)
1496 {
1497    b->cursor = nir_before_instr(&intrin->instr);
1498 
1499    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1500    nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
1501 
1502    nir_def *desc =
1503       build_buffer_addr_for_res_index(b,
1504                                       desc_type, intrin->src[0].ssa,
1505                                       addr_format, state);
1506 
1507    assert(intrin->def.bit_size == desc->bit_size);
1508    assert(intrin->def.num_components == desc->num_components);
1509    nir_def_rewrite_uses(&intrin->def, desc);
1510    nir_instr_remove(&intrin->instr);
1511 
1512    return true;
1513 }
1514 
1515 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1516 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
1517                     struct apply_pipeline_layout_state *state)
1518 {
1519    if (_mesa_set_search(state->lowered_instrs, intrin))
1520       return false;
1521 
1522    b->cursor = nir_before_instr(&intrin->instr);
1523 
1524    const nir_address_format addr_format =
1525       nir_address_format_64bit_bounded_global;
1526 
1527    nir_def *desc_addr =
1528       nir_build_addr_iadd_imm(
1529          b,
1530          build_desc_addr_for_res_index(b,
1531                                        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1532                                        intrin->src[0].ssa,
1533                                        addr_format, state),
1534          addr_format,
1535          nir_var_mem_ssbo,
1536          state->pdevice->isl_dev.ss.size);
1537 
1538    nir_def *desc_range;
1539    if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
1540       /* Load the anv_address_range_descriptor */
1541       desc_range =
1542          build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1543    } else {
1544       /* Build a vec4 similar to anv_address_range_descriptor using the
1545        * RENDER_SURFACE_STATE.
1546        */
1547       desc_range =
1548          build_load_render_surface_state_address(b, desc_addr, state);
1549    }
1550 
1551    nir_def *size = nir_channel(b, desc_range, 2);
1552    nir_def_rewrite_uses(&intrin->def, size);
1553    nir_instr_remove(&intrin->instr);
1554 
1555    return true;
1556 }
1557 
1558 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1559 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1560                       struct apply_pipeline_layout_state *state)
1561 {
1562    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1563 
1564    b->cursor = nir_before_instr(&intrin->instr);
1565 
1566    bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1567    bool is_bindless;
1568    nir_def *handle =
1569       build_load_var_deref_surface_handle(b, deref, non_uniform,
1570                                           &is_bindless, state);
1571    nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1572 
1573    return true;
1574 }
1575 
1576 static bool
lower_image_size_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1577 lower_image_size_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1578                            struct apply_pipeline_layout_state *state)
1579 {
1580    if (nir_intrinsic_image_dim(intrin) != GLSL_SAMPLER_DIM_3D)
1581       return lower_image_intrinsic(b, intrin, state);
1582 
1583    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1584 
1585    b->cursor = nir_before_instr(&intrin->instr);
1586 
1587    bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1588    bool is_bindless;
1589    nir_def *handle =
1590       build_load_var_deref_surface_handle(b, deref, non_uniform,
1591                                           &is_bindless, state);
1592    nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1593 
1594    nir_variable *var = nir_deref_instr_get_variable(deref);
1595    const uint32_t set = var->data.descriptor_set;
1596    const uint32_t binding = var->data.binding;
1597 
1598    nir_def *array_index;
1599    if (deref->deref_type != nir_deref_type_var) {
1600       assert(deref->deref_type == nir_deref_type_array);
1601       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1602       array_index = deref->arr.index.ssa;
1603    } else {
1604       array_index = nir_imm_int(b, 0);
1605    }
1606 
1607    nir_def *desc_addr = build_desc_addr_for_binding(
1608       b, set, binding, array_index, state);
1609 
1610    b->cursor = nir_after_instr(&intrin->instr);
1611 
1612    nir_def *image_depth =
1613       build_load_storage_3d_image_depth(b, desc_addr,
1614                                         nir_channel(b, &intrin->def, 2),
1615                                         state);
1616 
1617    nir_def *comps[4] = {};
1618    for (unsigned c = 0; c < intrin->def.num_components; c++)
1619       comps[c] = c == 2 ? image_depth : nir_channel(b, &intrin->def, c);
1620 
1621    nir_def *vec = nir_vec(b, comps, intrin->def.num_components);
1622    nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
1623 
1624    return true;
1625 }
1626 
1627 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1628 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
1629                     struct apply_pipeline_layout_state *state)
1630 {
1631    b->cursor = nir_instr_remove(&intrin->instr);
1632 
1633    /* Any constant-offset load_constant instructions should have been removed
1634     * by constant folding.
1635     */
1636    assert(!nir_src_is_const(intrin->src[0]));
1637    nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
1638                                       nir_intrinsic_base(intrin));
1639 
1640    unsigned load_size = intrin->def.num_components *
1641                         intrin->def.bit_size / 8;
1642    unsigned load_align = intrin->def.bit_size / 8;
1643 
1644    assert(load_size < b->shader->constant_data_size);
1645    unsigned max_offset = b->shader->constant_data_size - load_size;
1646    offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
1647 
1648    nir_def *const_data_addr = nir_pack_64_2x32_split(b,
1649       nir_iadd(b,
1650          nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
1651          offset),
1652       nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
1653 
1654    nir_def *data =
1655       nir_load_global_constant(b, const_data_addr,
1656                                load_align,
1657                                intrin->def.num_components,
1658                                intrin->def.bit_size);
1659 
1660    nir_def_rewrite_uses(&intrin->def, data);
1661 
1662    return true;
1663 }
1664 
1665 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1666 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
1667                         struct apply_pipeline_layout_state *state)
1668 {
1669    b->cursor = nir_instr_remove(&intrin->instr);
1670 
1671    nir_def *base_workgroup_id =
1672       nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
1673                              .base = offsetof(struct anv_push_constants, cs.base_work_group_id),
1674                              .range = sizeof_field(struct anv_push_constants, cs.base_work_group_id));
1675    nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
1676 
1677    return true;
1678 }
1679 
1680 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned base_index,unsigned plane,struct apply_pipeline_layout_state * state)1681 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
1682                 nir_tex_src_type deref_src_type,
1683                 unsigned base_index, unsigned plane,
1684                 struct apply_pipeline_layout_state *state)
1685 {
1686    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
1687    if (deref_src_idx < 0)
1688       return;
1689 
1690    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1691    nir_variable *var = nir_deref_instr_get_variable(deref);
1692 
1693    const bool is_sampler = deref_src_type == nir_tex_src_sampler_deref;
1694    const unsigned set = var->data.descriptor_set;
1695    const unsigned binding = var->data.binding;
1696    const bool bindless = is_binding_bindless(set, binding, is_sampler, state);
1697 
1698    nir_def *array_index = NULL;
1699    if (deref->deref_type != nir_deref_type_var) {
1700       assert(deref->deref_type == nir_deref_type_array);
1701 
1702       array_index = deref->arr.index.ssa;
1703    } else {
1704       array_index = nir_imm_int(b, 0);
1705    }
1706 
1707    nir_tex_src_type offset_src_type;
1708    nir_def *index;
1709    if (deref_src_type == nir_tex_src_texture_deref) {
1710       index = build_surface_index_for_binding(b, set, binding, array_index,
1711                                               plane,
1712                                               tex->texture_non_uniform,
1713                                               state);
1714       offset_src_type = bindless ?
1715                         nir_tex_src_texture_handle :
1716                         nir_tex_src_texture_offset;
1717    } else {
1718       assert(deref_src_type == nir_tex_src_sampler_deref);
1719 
1720       index = build_sampler_handle_for_binding(b, set, binding, array_index,
1721                                                plane,
1722                                                tex->sampler_non_uniform,
1723                                                state);
1724       offset_src_type = bindless ?
1725                         nir_tex_src_sampler_handle :
1726                         nir_tex_src_sampler_offset;
1727    }
1728 
1729    nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1730    tex->src[deref_src_idx].src_type = offset_src_type;
1731 }
1732 
1733 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1734 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1735 {
1736    int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1737    if (plane_src_idx < 0)
1738       return 0;
1739 
1740    unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1741 
1742    nir_tex_instr_remove_src(tex, plane_src_idx);
1743 
1744    return plane;
1745 }
1746 
1747 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1748 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1749                        unsigned start, unsigned end)
1750 {
1751    if (start == end - 1) {
1752       return srcs[start];
1753    } else {
1754       unsigned mid = start + (end - start) / 2;
1755       return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1756                        build_def_array_select(b, srcs, idx, start, mid),
1757                        build_def_array_select(b, srcs, idx, mid, end));
1758    }
1759 }
1760 
1761 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1762 lower_tex(nir_builder *b, nir_tex_instr *tex,
1763           struct apply_pipeline_layout_state *state)
1764 {
1765    unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1766 
1767    b->cursor = nir_before_instr(&tex->instr);
1768 
1769    lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1770                    tex->texture_index, plane, state);
1771    lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1772                    tex->sampler_index, plane, state);
1773 
1774    /* The whole lot will be embedded in the offset/handle source */
1775    tex->texture_index = 0;
1776    tex->sampler_index = 0;
1777 
1778    return true;
1779 }
1780 
1781 static bool
lower_ray_query_globals(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1782 lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin,
1783                         struct apply_pipeline_layout_state *state)
1784 {
1785    b->cursor = nir_instr_remove(&intrin->instr);
1786 
1787    nir_def *rq_globals =
1788       nir_load_push_constant(b, 1, 64, nir_imm_int(b, 0),
1789                              .base = offsetof(struct anv_push_constants, ray_query_globals),
1790                              .range = sizeof_field(struct anv_push_constants, ray_query_globals));
1791    nir_def_rewrite_uses(&intrin->def, rq_globals);
1792 
1793    return true;
1794 }
1795 
1796 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1797 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1798 {
1799    struct apply_pipeline_layout_state *state = _state;
1800 
1801    switch (instr->type) {
1802    case nir_instr_type_intrinsic: {
1803       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1804       switch (intrin->intrinsic) {
1805       case nir_intrinsic_vulkan_resource_index:
1806          return lower_res_index_intrinsic(b, intrin, state);
1807       case nir_intrinsic_vulkan_resource_reindex:
1808          return lower_res_reindex_intrinsic(b, intrin, state);
1809       case nir_intrinsic_load_vulkan_descriptor:
1810          return lower_load_vulkan_descriptor(b, intrin, state);
1811       case nir_intrinsic_get_ssbo_size:
1812          return lower_get_ssbo_size(b, intrin, state);
1813       case nir_intrinsic_image_deref_load:
1814       case nir_intrinsic_image_deref_store:
1815       case nir_intrinsic_image_deref_atomic:
1816       case nir_intrinsic_image_deref_atomic_swap:
1817       case nir_intrinsic_image_deref_samples:
1818       case nir_intrinsic_image_deref_load_param_intel:
1819       case nir_intrinsic_image_deref_load_raw_intel:
1820       case nir_intrinsic_image_deref_store_raw_intel:
1821       case nir_intrinsic_image_deref_sparse_load:
1822          return lower_image_intrinsic(b, intrin, state);
1823       case nir_intrinsic_image_deref_size:
1824          return lower_image_size_intrinsic(b, intrin, state);
1825       case nir_intrinsic_load_constant:
1826          return lower_load_constant(b, intrin, state);
1827       case nir_intrinsic_load_base_workgroup_id:
1828          return lower_base_workgroup_id(b, intrin, state);
1829       case nir_intrinsic_load_ray_query_global_intel:
1830          return lower_ray_query_globals(b, intrin, state);
1831       default:
1832          return false;
1833       }
1834       break;
1835    }
1836    case nir_instr_type_tex:
1837       return lower_tex(b, nir_instr_as_tex(instr), state);
1838    default:
1839       return false;
1840    }
1841 }
1842 
1843 struct binding_info {
1844    uint32_t binding;
1845    uint8_t set;
1846    uint16_t score;
1847 };
1848 
1849 static int
compare_binding_infos(const void * _a,const void * _b)1850 compare_binding_infos(const void *_a, const void *_b)
1851 {
1852    const struct binding_info *a = _a, *b = _b;
1853    if (a->score != b->score)
1854       return b->score - a->score;
1855 
1856    if (a->set != b->set)
1857       return a->set - b->set;
1858 
1859    return a->binding - b->binding;
1860 }
1861 
1862 #ifndef NDEBUG
1863 static void
anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout * layout,nir_shader * shader)1864 anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout *layout,
1865                              nir_shader *shader)
1866 {
1867    nir_foreach_function_impl(impl, shader) {
1868       nir_foreach_block(block, impl) {
1869          nir_foreach_instr(instr, block) {
1870             if (instr->type != nir_instr_type_intrinsic)
1871                continue;
1872 
1873             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1874             if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
1875                continue;
1876 
1877             unsigned set = nir_intrinsic_desc_set(intrin);
1878             assert(layout->set[set].layout);
1879          }
1880       }
1881    }
1882 }
1883 #endif
1884 
1885 static bool
binding_is_promotable_to_push(const struct anv_descriptor_set_binding_layout * bind_layout)1886 binding_is_promotable_to_push(const struct anv_descriptor_set_binding_layout *bind_layout)
1887 {
1888    return (bind_layout->flags & non_pushable_binding_flags) == 0;
1889 }
1890 
1891 static void
add_null_bti_entry(struct anv_pipeline_bind_map * map)1892 add_null_bti_entry(struct anv_pipeline_bind_map *map)
1893 {
1894    map->surface_to_descriptor[map->surface_count++] =
1895       (struct anv_pipeline_binding) {
1896          .set = ANV_DESCRIPTOR_SET_NULL,
1897    };
1898    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1899 }
1900 
1901 static void
add_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_descriptor_set_binding_layout * bind_layout)1902 add_bti_entry(struct anv_pipeline_bind_map *map,
1903               uint32_t set,
1904               uint32_t binding,
1905               uint32_t element,
1906               uint32_t plane,
1907               const struct anv_descriptor_set_binding_layout *bind_layout)
1908 {
1909    map->surface_to_descriptor[map->surface_count++] =
1910       (struct anv_pipeline_binding) {
1911          .set = set,
1912          .binding = binding,
1913          .index = bind_layout->descriptor_index + element,
1914          .set_offset = bind_layout->descriptor_surface_offset +
1915                        element * bind_layout->descriptor_surface_stride +
1916                        plane * bind_layout->descriptor_data_surface_size,
1917          .plane = plane,
1918    };
1919    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1920 }
1921 
1922 static void
add_dynamic_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)1923 add_dynamic_bti_entry(struct anv_pipeline_bind_map *map,
1924                       uint32_t set,
1925                       uint32_t binding,
1926                       uint32_t element,
1927                       const struct anv_pipeline_sets_layout *layout,
1928                       const struct anv_descriptor_set_binding_layout *bind_layout)
1929 {
1930    map->surface_to_descriptor[map->surface_count++] =
1931       (struct anv_pipeline_binding) {
1932          .set = set,
1933          .binding = binding,
1934          .index = bind_layout->descriptor_index + element,
1935          .set_offset = bind_layout->descriptor_surface_offset +
1936                        element * bind_layout->descriptor_surface_stride,
1937          .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
1938    };
1939    assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1940 }
1941 
1942 static void
add_sampler_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)1943 add_sampler_entry(struct anv_pipeline_bind_map *map,
1944                   uint32_t set,
1945                   uint32_t binding,
1946                   uint32_t element,
1947                   uint32_t plane,
1948                   const struct anv_pipeline_sets_layout *layout,
1949                   const struct anv_descriptor_set_binding_layout *bind_layout)
1950 {
1951    assert((bind_layout->descriptor_index + element) < layout->set[set].layout->descriptor_count);
1952    map->sampler_to_descriptor[map->sampler_count++] =
1953       (struct anv_pipeline_binding) {
1954          .set = set,
1955          .binding = binding,
1956          .index = bind_layout->descriptor_index + element,
1957          .plane = plane,
1958    };
1959 }
1960 
1961 static void
add_push_entry(struct anv_pipeline_push_map * push_map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)1962 add_push_entry(struct anv_pipeline_push_map *push_map,
1963                uint32_t set,
1964                uint32_t binding,
1965                uint32_t element,
1966                const struct anv_pipeline_sets_layout *layout,
1967                const struct anv_descriptor_set_binding_layout *bind_layout)
1968 {
1969    push_map->block_to_descriptor[push_map->block_count++] =
1970       (struct anv_pipeline_binding) {
1971          .set = set,
1972          .binding = binding,
1973          .index = bind_layout->descriptor_index + element,
1974          .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
1975    };
1976 }
1977 
1978 static bool
binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * binding)1979 binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state *state,
1980                                          const struct anv_descriptor_set_binding_layout *binding)
1981 {
1982    if ((binding->data & ANV_DESCRIPTOR_BTI_SURFACE_STATE) == 0)
1983       return false;
1984 
1985    if (state->pdevice->always_use_bindless &&
1986        (binding->data & ANV_DESCRIPTOR_SURFACE))
1987       return false;
1988 
1989    return true;
1990 }
1991 
1992 static bool
binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * binding)1993 binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state *state,
1994                                          const struct anv_descriptor_set_binding_layout *binding)
1995 {
1996    if ((binding->data & ANV_DESCRIPTOR_BTI_SAMPLER_STATE) == 0)
1997       return false;
1998 
1999    if (state->pdevice->always_use_bindless &&
2000        (binding->data & ANV_DESCRIPTOR_SAMPLER))
2001       return false;
2002 
2003    return true;
2004 }
2005 
2006 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum brw_robustness_flags robust_flags,bool independent_sets,const struct anv_pipeline_sets_layout * layout,struct anv_pipeline_bind_map * map,struct anv_pipeline_push_map * push_map,void * push_map_mem_ctx)2007 anv_nir_apply_pipeline_layout(nir_shader *shader,
2008                               const struct anv_physical_device *pdevice,
2009                               enum brw_robustness_flags robust_flags,
2010                               bool independent_sets,
2011                               const struct anv_pipeline_sets_layout *layout,
2012                               struct anv_pipeline_bind_map *map,
2013                               struct anv_pipeline_push_map *push_map,
2014                               void *push_map_mem_ctx)
2015 {
2016    void *mem_ctx = ralloc_context(NULL);
2017 
2018 #ifndef NDEBUG
2019    /* We should not have have any reference to a descriptor set that is not
2020     * given through the pipeline layout (layout->set[set].layout = NULL).
2021     */
2022    anv_validate_pipeline_layout(layout, shader);
2023 #endif
2024 
2025    const bool bindless_stage =
2026       brw_shader_stage_requires_bindless_resources(shader->info.stage);
2027    struct apply_pipeline_layout_state state = {
2028       .pdevice = pdevice,
2029       .layout = layout,
2030       .desc_addr_format = bindless_stage ?
2031                           nir_address_format_64bit_global_32bit_offset :
2032                           nir_address_format_32bit_index_offset,
2033       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
2034       .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
2035       .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
2036       .has_independent_sets = independent_sets,
2037    };
2038 
2039    /* Compute the amount of push block items required. */
2040    unsigned push_block_count = 0;
2041    for (unsigned s = 0; s < layout->num_sets; s++) {
2042       if (!layout->set[s].layout)
2043          continue;
2044 
2045       const unsigned count = layout->set[s].layout->binding_count;
2046       state.set[s].binding = rzalloc_array_size(mem_ctx, sizeof(state.set[s].binding[0]), count);
2047 
2048       const struct anv_descriptor_set_layout *set_layout = layout->set[s].layout;
2049       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2050          if (set_layout->binding[b].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
2051             push_block_count += set_layout->binding[b].array_size;
2052       }
2053    }
2054 
2055    /* Find all use sets/bindings */
2056    nir_shader_instructions_pass(shader, get_used_bindings,
2057                                 nir_metadata_all, &state);
2058 
2059    /* Assign a BTI to each used descriptor set */
2060    for (unsigned s = 0; s < layout->num_sets; s++) {
2061       if (state.desc_addr_format != nir_address_format_32bit_index_offset) {
2062          state.set[s].desc_offset = BINDLESS_OFFSET;
2063       } else if (state.set[s].desc_buffer_used) {
2064          map->surface_to_descriptor[map->surface_count] =
2065             (struct anv_pipeline_binding) {
2066                .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
2067                .binding = UINT32_MAX,
2068                .index = s,
2069             };
2070          state.set[s].desc_offset = map->surface_count++;
2071       }
2072    }
2073 
2074    /* Assign a block index for each surface */
2075    push_map->block_to_descriptor =
2076       rzalloc_array(push_map_mem_ctx, struct anv_pipeline_binding,
2077                     map->surface_count + push_block_count);
2078 
2079    memcpy(push_map->block_to_descriptor,
2080           map->surface_to_descriptor,
2081           sizeof(push_map->block_to_descriptor[0]) * map->surface_count);
2082    push_map->block_count = map->surface_count;
2083 
2084    /* Count used bindings and add push blocks for promotion to push
2085     * constants
2086     */
2087    unsigned used_binding_count = 0;
2088    for (uint32_t set = 0; set < layout->num_sets; set++) {
2089       struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2090       if (!set_layout)
2091          continue;
2092 
2093       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2094          if (state.set[set].binding[b].use_count == 0)
2095             continue;
2096 
2097          used_binding_count++;
2098 
2099          const struct anv_descriptor_set_binding_layout *bind_layout =
2100             &set_layout->binding[b];
2101          if (!binding_is_promotable_to_push(bind_layout))
2102             continue;
2103 
2104          if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
2105             state.set[set].binding[b].push_block = push_map->block_count;
2106             for (unsigned i = 0; i < bind_layout->array_size; i++)
2107                add_push_entry(push_map, set, b, i, layout, bind_layout);
2108          } else {
2109             state.set[set].binding[b].push_block = state.set[set].desc_offset;
2110          }
2111       }
2112    }
2113 
2114    struct binding_info *infos =
2115       rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
2116    used_binding_count = 0;
2117    for (uint32_t set = 0; set < layout->num_sets; set++) {
2118       const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2119       if (!set_layout)
2120          continue;
2121 
2122       for (unsigned b = 0; b < set_layout->binding_count; b++) {
2123          if (state.set[set].binding[b].use_count == 0)
2124             continue;
2125 
2126          const struct anv_descriptor_set_binding_layout *binding =
2127                &layout->set[set].layout->binding[b];
2128 
2129          /* Do a fixed-point calculation to generate a score based on the
2130           * number of uses and the binding array size.  We shift by 7 instead
2131           * of 8 because we're going to use the top bit below to make
2132           * everything which does not support bindless super higher priority
2133           * than things which do.
2134           */
2135          uint16_t score = ((uint16_t)state.set[set].binding[b].use_count << 7) /
2136                           binding->array_size;
2137 
2138          /* If the descriptor type doesn't support bindless then put it at the
2139           * beginning so we guarantee it gets a slot.
2140           */
2141          if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
2142              !anv_descriptor_supports_bindless(pdevice, binding, false))
2143             score |= 1 << 15;
2144 
2145          infos[used_binding_count++] = (struct binding_info) {
2146             .set = set,
2147             .binding = b,
2148             .score = score,
2149          };
2150       }
2151    }
2152 
2153    /* Order the binding infos based on score with highest scores first.  If
2154     * scores are equal we then order by set and binding.
2155     */
2156    qsort(infos, used_binding_count, sizeof(struct binding_info),
2157          compare_binding_infos);
2158 
2159    for (unsigned i = 0; i < used_binding_count; i++) {
2160       unsigned set = infos[i].set, b = infos[i].binding;
2161       assert(layout->set[set].layout);
2162       const struct anv_descriptor_set_binding_layout *binding =
2163             &layout->set[set].layout->binding[b];
2164 
2165       const uint32_t array_size = binding->array_size;
2166 
2167       if (binding->dynamic_offset_index >= 0)
2168          state.has_dynamic_buffers = true;
2169 
2170       const unsigned array_multiplier = bti_multiplier(&state, set, b);
2171       assert(array_multiplier >= 1);
2172 
2173       /* Assume bindless by default */
2174       state.set[set].binding[b].surface_offset = BINDLESS_OFFSET;
2175       state.set[set].binding[b].sampler_offset = BINDLESS_OFFSET;
2176 
2177       if (binding_should_use_surface_binding_table(&state, binding)) {
2178          if (map->surface_count + array_size * array_multiplier > MAX_BINDING_TABLE_SIZE ||
2179              anv_descriptor_requires_bindless(pdevice, binding, false) ||
2180              brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2181             /* If this descriptor doesn't fit in the binding table or if it
2182              * requires bindless for some reason, flag it as bindless.
2183              */
2184             assert(anv_descriptor_supports_bindless(pdevice, binding, false));
2185          } else {
2186             state.set[set].binding[b].surface_offset = map->surface_count;
2187             if (binding->dynamic_offset_index < 0) {
2188                struct anv_sampler **samplers = binding->immutable_samplers;
2189                uint8_t max_planes = bti_multiplier(&state, set, b);
2190                for (unsigned i = 0; i < binding->array_size; i++) {
2191                   uint8_t planes = samplers ? samplers[i]->n_planes : 1;
2192                   for (uint8_t p = 0; p < max_planes; p++) {
2193                      if (p < planes) {
2194                         add_bti_entry(map, set, b, i, p, binding);
2195                      } else {
2196                         add_null_bti_entry(map);
2197                      }
2198                   }
2199                }
2200             } else {
2201                for (unsigned i = 0; i < binding->array_size; i++)
2202                   add_dynamic_bti_entry(map, set, b, i, layout, binding);
2203             }
2204          }
2205          assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2206       }
2207 
2208       if (binding_should_use_sampler_binding_table(&state, binding)) {
2209          if (map->sampler_count + array_size * array_multiplier > MAX_SAMPLER_TABLE_SIZE ||
2210              anv_descriptor_requires_bindless(pdevice, binding, true) ||
2211              brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2212             /* If this descriptor doesn't fit in the binding table or if it
2213              * requires bindless for some reason, flag it as bindless.
2214              *
2215              * We also make large sampler arrays bindless because we can avoid
2216              * using indirect sends thanks to bindless samplers being packed
2217              * less tightly than the sampler table.
2218              */
2219             assert(anv_descriptor_supports_bindless(pdevice, binding, true));
2220          } else {
2221             state.set[set].binding[b].sampler_offset = map->sampler_count;
2222             uint8_t max_planes = bti_multiplier(&state, set, b);
2223             for (unsigned i = 0; i < binding->array_size; i++) {
2224                for (uint8_t p = 0; p < max_planes; p++) {
2225                   add_sampler_entry(map, set, b, i, p, layout, binding);
2226                }
2227             }
2228          }
2229       }
2230 
2231       if (binding->data & ANV_DESCRIPTOR_INLINE_UNIFORM) {
2232          state.set[set].binding[b].surface_offset = state.set[set].desc_offset;
2233       }
2234 
2235 #if 0
2236       fprintf(stderr, "set=%u binding=%u surface_offset=0x%08x require_bindless=%u type=%s\n",
2237               set, b,
2238               state.set[set].binding[b].surface_offset,
2239               anv_descriptor_requires_bindless(pdevice, binding, false),
2240               vk_DescriptorType_to_str(binding->type));
2241 #endif
2242    }
2243 
2244    /* Before we do the normal lowering, we look for any SSBO operations
2245     * that we can lower to the BTI model and lower them up-front.  The BTI
2246     * model can perform better than the A64 model for a couple reasons:
2247     *
2248     *  1. 48-bit address calculations are potentially expensive and using
2249     *     the BTI model lets us simply compute 32-bit offsets and the
2250     *     hardware adds the 64-bit surface base address.
2251     *
2252     *  2. The BTI messages, because they use surface states, do bounds
2253     *     checking for us.  With the A64 model, we have to do our own
2254     *     bounds checking and this means wider pointers and extra
2255     *     calculations and branching in the shader.
2256     *
2257     * The solution to both of these is to convert things to the BTI model
2258     * opportunistically.  The reason why we need to do this as a pre-pass
2259     * is for two reasons:
2260     *
2261     *  1. The BTI model requires nir_address_format_32bit_index_offset
2262     *     pointers which are not the same type as the pointers needed for
2263     *     the A64 model.  Because all our derefs are set up for the A64
2264     *     model (in case we have variable pointers), we have to crawl all
2265     *     the way back to the vulkan_resource_index intrinsic and build a
2266     *     completely fresh index+offset calculation.
2267     *
2268     *  2. Because the variable-pointers-capable lowering that we do as part
2269     *     of apply_pipeline_layout_block is destructive (It really has to
2270     *     be to handle variable pointers properly), we've lost the deref
2271     *     information by the time we get to the load/store/atomic
2272     *     intrinsics in that pass.
2273     */
2274    nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
2275                                 nir_metadata_block_index |
2276                                 nir_metadata_dominance,
2277                                 &state);
2278 
2279    /* We just got rid of all the direct access.  Delete it so it's not in the
2280     * way when we do our indirect lowering.
2281     */
2282    nir_opt_dce(shader);
2283 
2284    nir_shader_instructions_pass(shader, apply_pipeline_layout,
2285                                 nir_metadata_block_index |
2286                                 nir_metadata_dominance,
2287                                 &state);
2288 
2289    ralloc_free(mem_ctx);
2290 
2291    if (brw_shader_stage_is_bindless(shader->info.stage)) {
2292       assert(map->surface_count == 0);
2293       assert(map->sampler_count == 0);
2294    }
2295 
2296 #if 0
2297    fprintf(stderr, "bti:\n");
2298    for (unsigned i = 0; i < map->surface_count; i++) {
2299       fprintf(stderr, "  %03i: set=%03u binding=%06i index=%u plane=%u set_offset=0x%08x dyn_offset=0x%08x\n", i,
2300               map->surface_to_descriptor[i].set,
2301               map->surface_to_descriptor[i].binding,
2302               map->surface_to_descriptor[i].index,
2303               map->surface_to_descriptor[i].plane,
2304               map->surface_to_descriptor[i].set_offset,
2305               map->surface_to_descriptor[i].dynamic_offset_index);
2306    }
2307    fprintf(stderr, "sti:\n");
2308    for (unsigned i = 0; i < map->sampler_count; i++) {
2309       fprintf(stderr, "  %03i: set=%03u binding=%06i index=%u plane=%u\n", i,
2310               map->sampler_to_descriptor[i].set,
2311               map->sampler_to_descriptor[i].binding,
2312               map->sampler_to_descriptor[i].index,
2313               map->sampler_to_descriptor[i].plane);
2314    }
2315 #endif
2316 
2317    /* Now that we're done computing the surface and sampler portions of the
2318     * bind map, hash them.  This lets us quickly determine if the actual
2319     * mapping has changed and not just a no-op pipeline change.
2320     */
2321    _mesa_sha1_compute(map->surface_to_descriptor,
2322                       map->surface_count * sizeof(struct anv_pipeline_binding),
2323                       map->surface_sha1);
2324    _mesa_sha1_compute(map->sampler_to_descriptor,
2325                       map->sampler_count * sizeof(struct anv_pipeline_binding),
2326                       map->sampler_sha1);
2327 }
2328