1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/brw_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29
30 #include "vk_enum_to_str.h"
31
32 #include "genxml/genX_bits.h"
33
34 /* Sampler tables don't actually have a maximum size but we pick one just so
35 * that we don't end up emitting too much state on-the-fly.
36 */
37 #define MAX_SAMPLER_TABLE_SIZE 128
38 #define BINDLESS_OFFSET 255
39
40 #define sizeof_field(type, field) sizeof(((type *)0)->field)
41
42 enum binding_property {
43 BINDING_PROPERTY_NORMAL = BITFIELD_BIT(0),
44 BINDING_PROPERTY_PUSHABLE = BITFIELD_BIT(1),
45 };
46
47 struct apply_pipeline_layout_state {
48 const struct anv_physical_device *pdevice;
49
50 const struct anv_pipeline_sets_layout *layout;
51 nir_address_format desc_addr_format;
52 nir_address_format ssbo_addr_format;
53 nir_address_format ubo_addr_format;
54
55 /* Place to flag lowered instructions so we don't lower them twice */
56 struct set *lowered_instrs;
57
58 bool uses_constants;
59 bool has_dynamic_buffers;
60 bool has_independent_sets;
61 uint8_t constants_offset;
62 struct {
63 bool desc_buffer_used;
64 uint8_t desc_offset;
65
66 struct {
67 uint8_t use_count;
68
69 /* Binding table offset */
70 uint8_t surface_offset;
71
72 /* Sampler table offset */
73 uint8_t sampler_offset;
74
75 /* Properties of the binding */
76 enum binding_property properties;
77
78 /* For each binding is identified with a unique identifier for push
79 * computation.
80 */
81 uint32_t push_block;
82 } *binding;
83 } set[MAX_SETS];
84 };
85
86 /* For a given binding, tells us how many binding table entries are needed per
87 * element.
88 */
89 static uint32_t
bti_multiplier(const struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)90 bti_multiplier(const struct apply_pipeline_layout_state *state,
91 uint32_t set, uint32_t binding)
92 {
93 const struct anv_descriptor_set_layout *set_layout =
94 state->layout->set[set].layout;
95 const struct anv_descriptor_set_binding_layout *bind_layout =
96 &set_layout->binding[binding];
97
98 return bind_layout->max_plane_count == 0 ? 1 : bind_layout->max_plane_count;
99 }
100
101 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)102 addr_format_for_desc_type(VkDescriptorType desc_type,
103 struct apply_pipeline_layout_state *state)
104 {
105 switch (desc_type) {
106 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
107 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
108 return state->ssbo_addr_format;
109
110 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
111 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
112 return state->ubo_addr_format;
113
114 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
115 return state->desc_addr_format;
116
117 default:
118 unreachable("Unsupported descriptor type");
119 }
120 }
121
122 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)123 add_binding(struct apply_pipeline_layout_state *state,
124 uint32_t set, uint32_t binding)
125 {
126 const struct anv_descriptor_set_binding_layout *bind_layout =
127 &state->layout->set[set].layout->binding[binding];
128
129 assert(set < state->layout->num_sets);
130 assert(binding < state->layout->set[set].layout->binding_count);
131
132 if (state->set[set].binding[binding].use_count < UINT8_MAX)
133 state->set[set].binding[binding].use_count++;
134
135 /* Only flag the descriptor buffer as used if there's actually data for
136 * this binding. This lets us be lazy and call this function constantly
137 * without worrying about unnecessarily enabling the buffer.
138 */
139 if (bind_layout->descriptor_surface_stride)
140 state->set[set].desc_buffer_used = true;
141
142 if (bind_layout->dynamic_offset_index >= 0)
143 state->has_dynamic_buffers = true;
144
145 state->set[set].binding[binding].properties |= BINDING_PROPERTY_NORMAL;
146 }
147
148 const VkDescriptorBindingFlags non_pushable_binding_flags =
149 VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT |
150 VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT |
151 VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT;
152
153 static void
add_binding_type(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding,VkDescriptorType type)154 add_binding_type(struct apply_pipeline_layout_state *state,
155 uint32_t set, uint32_t binding, VkDescriptorType type)
156 {
157 add_binding(state, set, binding);
158
159 if ((state->layout->set[set].layout->binding[binding].flags &
160 non_pushable_binding_flags) == 0 &&
161 (state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
162 state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
163 state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
164 state->layout->set[set].layout->binding[binding].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) &&
165 (type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
166 type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK))
167 state->set[set].binding[binding].properties |= BINDING_PROPERTY_PUSHABLE;
168 }
169
170 static void
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)171 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
172 {
173 nir_deref_instr *deref = nir_src_as_deref(src);
174 nir_variable *var = nir_deref_instr_get_variable(deref);
175 add_binding(state, var->data.descriptor_set, var->data.binding);
176 }
177
178 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)179 add_tex_src_binding(struct apply_pipeline_layout_state *state,
180 nir_tex_instr *tex, nir_tex_src_type deref_src_type)
181 {
182 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
183 if (deref_src_idx < 0)
184 return;
185
186 add_deref_src_binding(state, tex->src[deref_src_idx].src);
187 }
188
189 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)190 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
191 {
192 struct apply_pipeline_layout_state *state = _state;
193
194 switch (instr->type) {
195 case nir_instr_type_intrinsic: {
196 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
197 switch (intrin->intrinsic) {
198 case nir_intrinsic_vulkan_resource_index:
199 add_binding_type(state,
200 nir_intrinsic_desc_set(intrin),
201 nir_intrinsic_binding(intrin),
202 nir_intrinsic_desc_type(intrin));
203 break;
204
205 case nir_intrinsic_image_deref_load:
206 case nir_intrinsic_image_deref_store:
207 case nir_intrinsic_image_deref_atomic:
208 case nir_intrinsic_image_deref_atomic_swap:
209 case nir_intrinsic_image_deref_size:
210 case nir_intrinsic_image_deref_samples:
211 case nir_intrinsic_image_deref_load_param_intel:
212 case nir_intrinsic_image_deref_load_raw_intel:
213 case nir_intrinsic_image_deref_store_raw_intel:
214 case nir_intrinsic_image_deref_sparse_load:
215 add_deref_src_binding(state, intrin->src[0]);
216 break;
217
218 case nir_intrinsic_load_constant:
219 state->uses_constants = true;
220 break;
221
222 default:
223 break;
224 }
225 break;
226 }
227 case nir_instr_type_tex: {
228 nir_tex_instr *tex = nir_instr_as_tex(instr);
229 add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
230 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
231 break;
232 }
233 default:
234 break;
235 }
236
237 return false;
238 }
239
240 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)241 find_descriptor_for_index_src(nir_src src,
242 struct apply_pipeline_layout_state *state)
243 {
244 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
245
246 while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
247 intrin = nir_src_as_intrinsic(intrin->src[0]);
248
249 if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
250 return NULL;
251
252 return intrin;
253 }
254
255 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)256 descriptor_has_bti(nir_intrinsic_instr *intrin,
257 struct apply_pipeline_layout_state *state)
258 {
259 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
260
261 uint32_t set = nir_intrinsic_desc_set(intrin);
262 uint32_t binding = nir_intrinsic_binding(intrin);
263 const struct anv_descriptor_set_binding_layout *bind_layout =
264 &state->layout->set[set].layout->binding[binding];
265
266 uint32_t surface_index;
267 if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
268 surface_index = state->set[set].desc_offset;
269 else
270 surface_index = state->set[set].binding[binding].surface_offset;
271
272 /* Only lower to a BTI message if we have a valid binding table index. */
273 return surface_index < MAX_BINDING_TABLE_SIZE;
274 }
275
276 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)277 descriptor_address_format(nir_intrinsic_instr *intrin,
278 struct apply_pipeline_layout_state *state)
279 {
280 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
281
282 return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
283 }
284
285 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)286 nir_deref_find_descriptor(nir_deref_instr *deref,
287 struct apply_pipeline_layout_state *state)
288 {
289 while (1) {
290 /* Nothing we will use this on has a variable */
291 assert(deref->deref_type != nir_deref_type_var);
292
293 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
294 if (!parent)
295 break;
296
297 deref = parent;
298 }
299 assert(deref->deref_type == nir_deref_type_cast);
300
301 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
302 if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
303 return NULL;
304
305 return find_descriptor_for_index_src(intrin->src[0], state);
306 }
307
308 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,const struct apply_pipeline_layout_state * state)309 build_load_descriptor_mem(nir_builder *b,
310 nir_def *desc_addr, unsigned desc_offset,
311 unsigned num_components, unsigned bit_size,
312 const struct apply_pipeline_layout_state *state)
313
314 {
315 switch (state->desc_addr_format) {
316 case nir_address_format_64bit_global_32bit_offset: {
317 nir_def *base_addr =
318 nir_pack_64_2x32(b, nir_trim_vector(b, desc_addr, 2));
319 nir_def *offset32 =
320 nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset);
321
322 return nir_load_global_constant_offset(b, num_components, bit_size,
323 base_addr, offset32,
324 .align_mul = 8,
325 .align_offset = desc_offset % 8);
326 }
327
328 case nir_address_format_32bit_index_offset: {
329 nir_def *surface_index = nir_channel(b, desc_addr, 0);
330 nir_def *offset32 =
331 nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
332
333 return nir_load_ubo(b, num_components, bit_size,
334 surface_index, offset32,
335 .align_mul = 8,
336 .align_offset = desc_offset % 8,
337 .range_base = 0,
338 .range = num_components * bit_size / 8);
339 }
340
341 default:
342 unreachable("Unsupported address format");
343 }
344 }
345
346 /* When using direct descriptor, we do not have a structure to read in memory
347 * like anv_address_range_descriptor where all the fields match perfectly the
348 * vec4 address format we need to generate for A64 messages. Instead we need
349 * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
350 * enough for the surface address, lot less fun for the size where you have to
351 * combine 3 fields scattered over multiple dwords, add one to the total and
352 * do a check against the surface type to deal with the null descriptors.
353 *
354 * Fortunately we can reuse the Auxiliary surface adddress field to stash our
355 * buffer size and just load a vec4.
356 */
357 static nir_def *
build_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)358 build_optimized_load_render_surface_state_address(nir_builder *b,
359 nir_def *desc_addr,
360 struct apply_pipeline_layout_state *state)
361
362 {
363 const struct intel_device_info *devinfo = &state->pdevice->info;
364
365 nir_def *surface_addr =
366 build_load_descriptor_mem(b, desc_addr,
367 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
368 4, 32, state);
369 nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
370 nir_def *addr_udw = nir_channel(b, surface_addr, 1);
371 nir_def *length = nir_channel(b, surface_addr, 3);
372
373 return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
374 }
375
376 /* When using direct descriptor, we do not have a structure to read in memory
377 * like anv_address_range_descriptor where all the fields match perfectly the
378 * vec4 address format we need to generate for A64 messages. Instead we need
379 * to build the vec4 from parsing the RENDER_SURFACE_STATE structure. Easy
380 * enough for the surface address, lot less fun for the size.
381 */
382 static nir_def *
build_non_optimized_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)383 build_non_optimized_load_render_surface_state_address(nir_builder *b,
384 nir_def *desc_addr,
385 struct apply_pipeline_layout_state *state)
386
387 {
388 const struct intel_device_info *devinfo = &state->pdevice->info;
389
390 assert(((RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) +
391 RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo) - 1) -
392 RENDER_SURFACE_STATE_Width_start(devinfo)) / 8 <= 32);
393
394 nir_def *surface_addr =
395 build_load_descriptor_mem(b, desc_addr,
396 RENDER_SURFACE_STATE_SurfaceBaseAddress_start(devinfo) / 8,
397 DIV_ROUND_UP(RENDER_SURFACE_STATE_SurfaceBaseAddress_bits(devinfo), 32),
398 32, state);
399 nir_def *addr_ldw = nir_channel(b, surface_addr, 0);
400 nir_def *addr_udw = nir_channel(b, surface_addr, 1);
401
402 /* Take all the RENDER_SURFACE_STATE fields from the beginning of the
403 * structure up to the Depth field.
404 */
405 const uint32_t type_sizes_dwords =
406 DIV_ROUND_UP(RENDER_SURFACE_STATE_Depth_start(devinfo) +
407 RENDER_SURFACE_STATE_Depth_bits(devinfo), 32);
408 nir_def *type_sizes =
409 build_load_descriptor_mem(b, desc_addr, 0, type_sizes_dwords, 32, state);
410
411 const unsigned width_start = RENDER_SURFACE_STATE_Width_start(devinfo);
412 /* SKL PRMs, Volume 2d: Command Reference: Structures, RENDER_SURFACE_STATE
413 *
414 * Width: "bits [6:0] of the number of entries in the buffer - 1"
415 * Height: "bits [20:7] of the number of entries in the buffer - 1"
416 * Depth: "bits [31:21] of the number of entries in the buffer - 1"
417 */
418 const unsigned width_bits = 7;
419 nir_def *width =
420 nir_iand_imm(b,
421 nir_ishr_imm(b,
422 nir_channel(b, type_sizes, width_start / 32),
423 width_start % 32),
424 (1u << width_bits) - 1);
425
426 const unsigned height_start = RENDER_SURFACE_STATE_Height_start(devinfo);
427 const unsigned height_bits = RENDER_SURFACE_STATE_Height_bits(devinfo);
428 nir_def *height =
429 nir_iand_imm(b,
430 nir_ishr_imm(b,
431 nir_channel(b, type_sizes, height_start / 32),
432 height_start % 32),
433 (1u << height_bits) - 1);
434
435 const unsigned depth_start = RENDER_SURFACE_STATE_Depth_start(devinfo);
436 const unsigned depth_bits = RENDER_SURFACE_STATE_Depth_bits(devinfo);
437 nir_def *depth =
438 nir_iand_imm(b,
439 nir_ishr_imm(b,
440 nir_channel(b, type_sizes, depth_start / 32),
441 depth_start % 32),
442 (1u << depth_bits) - 1);
443
444 nir_def *length = width;
445 length = nir_ior(b, length, nir_ishl_imm(b, height, width_bits));
446 length = nir_ior(b, length, nir_ishl_imm(b, depth, width_bits + height_bits));
447 length = nir_iadd_imm(b, length, 1);
448
449 /* Check the surface type, if it's SURFTYPE_NULL, set the length of the
450 * buffer to 0.
451 */
452 const unsigned type_start = RENDER_SURFACE_STATE_SurfaceType_start(devinfo);
453 const unsigned type_dw = type_start / 32;
454 nir_def *type =
455 nir_iand_imm(b,
456 nir_ishr_imm(b,
457 nir_channel(b, type_sizes, type_dw),
458 type_start % 32),
459 (1u << RENDER_SURFACE_STATE_SurfaceType_bits(devinfo)) - 1);
460
461 length = nir_bcsel(b,
462 nir_ieq_imm(b, type, 7 /* SURFTYPE_NULL */),
463 nir_imm_int(b, 0), length);
464
465 return nir_vec4(b, addr_ldw, addr_udw, length, nir_imm_int(b, 0));
466 }
467
468 static inline nir_def *
build_load_render_surface_state_address(nir_builder * b,nir_def * desc_addr,struct apply_pipeline_layout_state * state)469 build_load_render_surface_state_address(nir_builder *b,
470 nir_def *desc_addr,
471 struct apply_pipeline_layout_state *state)
472 {
473 if (state->pdevice->isl_dev.buffer_length_in_aux_addr)
474 return build_optimized_load_render_surface_state_address(b, desc_addr, state);
475 return build_non_optimized_load_render_surface_state_address(b, desc_addr, state);
476 }
477
478 /* Load the depth of a 3D storage image.
479 *
480 * Either by reading the indirect descriptor value, or reading the value from
481 * RENDER_SURFACE_STATE.
482 *
483 * This is necessary for VK_EXT_image_sliced_view_of_3d.
484 */
485 static nir_def *
build_load_storage_3d_image_depth(nir_builder * b,nir_def * desc_addr,nir_def * resinfo_depth,struct apply_pipeline_layout_state * state)486 build_load_storage_3d_image_depth(nir_builder *b,
487 nir_def *desc_addr,
488 nir_def *resinfo_depth,
489 struct apply_pipeline_layout_state *state)
490
491 {
492 const struct intel_device_info *devinfo = &state->pdevice->info;
493
494 if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
495 return build_load_descriptor_mem(
496 b, desc_addr,
497 offsetof(struct anv_storage_image_descriptor, image_depth),
498 1, 32, state);
499 } else {
500 nir_def *data = build_load_descriptor_mem(
501 b, desc_addr,
502 RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) / 8,
503 1, 32, state);
504 nir_def *depth =
505 nir_ushr_imm(
506 b, data,
507 RENDER_SURFACE_STATE_RenderTargetViewExtent_start(devinfo) % 32);
508 depth = nir_iand_imm(
509 b, depth,
510 (1u << RENDER_SURFACE_STATE_RenderTargetViewExtent_bits(devinfo)) - 1);
511 depth = nir_iadd_imm(b, depth, 1);
512
513 /* Return the minimum between the RESINFO value and the
514 * RENDER_SURFACE_STATE::RenderTargetViewExtent value.
515 *
516 * Both are expressed for the current view LOD, but in the case of a
517 * SURFTYPE_NULL, RESINFO will return the right value, while the -1
518 * value in RENDER_SURFACE_STATE should be ignored.
519 */
520 return nir_umin(b, resinfo_depth, depth);
521 }
522 }
523 /** Build a Vulkan resource index
524 *
525 * A "resource index" is the term used by our SPIR-V parser and the relevant
526 * NIR intrinsics for a reference into a descriptor set. It acts much like a
527 * deref in NIR except that it accesses opaque descriptors instead of memory.
528 *
529 * Coming out of SPIR-V, both the resource indices (in the form of
530 * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
531 * of nir_deref_instr) use the same vector component/bit size. The meaning
532 * of those values for memory derefs (nir_deref_instr) is given by the
533 * nir_address_format associated with the descriptor type. For resource
534 * indices, it's an entirely internal to ANV encoding which describes, in some
535 * sense, the address of the descriptor. Thanks to the NIR/SPIR-V rules, it
536 * must be packed into the same size SSA values as a memory address. For this
537 * reason, the actual encoding may depend both on the address format for
538 * memory derefs and the descriptor address format.
539 *
540 * The load_vulkan_descriptor intrinsic exists to provide a transition point
541 * between these two forms of derefs: descriptor and memory.
542 */
543 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,struct apply_pipeline_layout_state * state)544 build_res_index(nir_builder *b,
545 uint32_t set, uint32_t binding,
546 nir_def *array_index,
547 struct apply_pipeline_layout_state *state)
548 {
549 const struct anv_descriptor_set_binding_layout *bind_layout =
550 &state->layout->set[set].layout->binding[binding];
551
552 uint32_t array_size = bind_layout->array_size;
553
554 uint32_t set_idx;
555 switch (state->desc_addr_format) {
556 case nir_address_format_64bit_global_32bit_offset:
557 /* Descriptor set buffer accesses will go through A64 messages, so the
558 * index to get the descriptor set buffer address is located in the
559 * anv_push_constants::desc_surface_offsets and it's indexed by the set
560 * number.
561 */
562 set_idx = set;
563 break;
564
565 case nir_address_format_32bit_index_offset:
566 /* Descriptor set buffer accesses will go through the binding table. The
567 * offset is the entry in the binding table.
568 */
569 assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
570 set_idx = state->set[set].desc_offset;
571 break;
572
573 default:
574 unreachable("Unsupported address format");
575 }
576
577 assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
578 nir_def *dynamic_offset_index;
579 if (bind_layout->dynamic_offset_index >= 0) {
580 if (state->has_independent_sets) {
581 nir_def *dynamic_offset_start =
582 nir_load_desc_set_dynamic_index_intel(b, nir_imm_int(b, set));
583 dynamic_offset_index =
584 nir_iadd_imm(b, dynamic_offset_start,
585 bind_layout->dynamic_offset_index);
586 } else {
587 dynamic_offset_index =
588 nir_imm_int(b,
589 state->layout->set[set].dynamic_offset_start +
590 bind_layout->dynamic_offset_index);
591 }
592 } else {
593 dynamic_offset_index = nir_imm_int(b, 0xff); /* No dynamic offset */
594 }
595
596 const uint32_t desc_bti = state->set[set].binding[binding].surface_offset;
597 assert(bind_layout->descriptor_surface_stride % 8 == 0);
598 const uint32_t desc_stride = bind_layout->descriptor_surface_stride / 8;
599
600 nir_def *packed =
601 nir_ior_imm(b,
602 dynamic_offset_index,
603 (desc_stride << 24) |
604 (desc_bti << 16) |
605 (set_idx << 8));
606
607
608 return nir_vec4(b, packed,
609 nir_imm_int(b, bind_layout->descriptor_surface_offset),
610 nir_imm_int(b, array_size - 1),
611 array_index);
612 }
613
614 struct res_index_defs {
615 nir_def *bti_idx;
616 nir_def *set_idx;
617 nir_def *dyn_offset_base;
618 nir_def *desc_offset_base;
619 nir_def *array_index;
620 nir_def *desc_stride;
621 };
622
623 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)624 unpack_res_index(nir_builder *b, nir_def *index)
625 {
626 struct res_index_defs defs;
627
628 nir_def *packed = nir_channel(b, index, 0);
629 defs.desc_stride =
630 nir_imul_imm(b, nir_extract_u8(b, packed, nir_imm_int(b, 3)), 8);
631 defs.bti_idx = nir_extract_u8(b, packed, nir_imm_int(b, 2));
632 defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
633 defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
634
635 defs.desc_offset_base = nir_channel(b, index, 1);
636 defs.array_index = nir_umin(b, nir_channel(b, index, 2),
637 nir_channel(b, index, 3));
638
639 return defs;
640 }
641
642 /** Whether a surface is accessed through the bindless surface state heap */
643 static bool
is_binding_bindless(unsigned set,unsigned binding,bool sampler,const struct apply_pipeline_layout_state * state)644 is_binding_bindless(unsigned set, unsigned binding, bool sampler,
645 const struct apply_pipeline_layout_state *state)
646 {
647 /* Has binding table entry has been allocated for this binding? */
648 if (sampler &&
649 state->set[set].binding[binding].sampler_offset != BINDLESS_OFFSET)
650 return false;
651 if (!sampler &&
652 state->set[set].binding[binding].surface_offset != BINDLESS_OFFSET)
653 return false;
654
655 return true;
656 }
657
658 /** Adjust a Vulkan resource index
659 *
660 * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
661 * For array descriptors, it allows us to adjust the array index. Thanks to
662 * variable pointers, we cannot always fold this re-index operation into the
663 * vulkan_resource_index intrinsic and we have to do it based on nothing but
664 * the address format.
665 */
666 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta)667 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta)
668 {
669 return nir_vec4(b, nir_channel(b, orig, 0),
670 nir_channel(b, orig, 1),
671 nir_channel(b, orig, 2),
672 nir_iadd(b, nir_channel(b, orig, 3), delta));
673 }
674
675 /** Get the address for a descriptor given its resource index
676 *
677 * Because of the re-indexing operations, we can't bounds check descriptor
678 * array access until we have the final index. That means we end up doing the
679 * bounds check here, if needed. See unpack_res_index() for more details.
680 *
681 * This function takes both a bind_layout and a desc_type which are used to
682 * determine the descriptor stride for array descriptors. The bind_layout is
683 * optional for buffer descriptor types.
684 */
685 static nir_def *
build_desc_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)686 build_desc_addr_for_res_index(nir_builder *b,
687 const VkDescriptorType desc_type,
688 nir_def *index, nir_address_format addr_format,
689 struct apply_pipeline_layout_state *state)
690 {
691 struct res_index_defs res = unpack_res_index(b, index);
692
693 nir_def *desc_offset = res.desc_offset_base;
694 if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
695 /* Compute the actual descriptor offset. For inline uniform blocks,
696 * the array index is ignored as they are only allowed to be a single
697 * descriptor (not an array) and there is no concept of a "stride".
698 *
699 */
700 desc_offset =
701 nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
702 }
703
704 switch (addr_format) {
705 case nir_address_format_64bit_global_32bit_offset:
706 case nir_address_format_64bit_bounded_global: {
707 switch (state->desc_addr_format) {
708 case nir_address_format_64bit_global_32bit_offset: {
709 nir_def *base_addr =
710 nir_load_desc_set_address_intel(b, res.set_idx);
711 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
712 nir_unpack_64_2x32_split_y(b, base_addr),
713 nir_imm_int(b, UINT32_MAX),
714 desc_offset);
715 }
716
717 case nir_address_format_32bit_index_offset:
718 return nir_vec2(b, res.set_idx, desc_offset);
719
720 default:
721 unreachable("Unhandled address format");
722 }
723 }
724
725 case nir_address_format_32bit_index_offset:
726 assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
727 assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
728 return nir_vec2(b, res.set_idx, desc_offset);
729
730 default:
731 unreachable("Unhandled address format");
732 }
733 }
734
735 static nir_def *
build_desc_addr_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,const struct apply_pipeline_layout_state * state)736 build_desc_addr_for_binding(nir_builder *b,
737 unsigned set, unsigned binding,
738 nir_def *array_index,
739 const struct apply_pipeline_layout_state *state)
740 {
741 const struct anv_descriptor_set_binding_layout *bind_layout =
742 &state->layout->set[set].layout->binding[binding];
743
744 switch (state->desc_addr_format) {
745 case nir_address_format_64bit_global_32bit_offset:
746 case nir_address_format_64bit_bounded_global: {
747 nir_def *set_addr = nir_load_desc_set_address_intel(b, nir_imm_int(b, set));
748 nir_def *desc_offset =
749 nir_iadd_imm(b,
750 nir_imul_imm(b,
751 array_index,
752 bind_layout->descriptor_surface_stride),
753 bind_layout->descriptor_surface_offset);
754
755 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, set_addr),
756 nir_unpack_64_2x32_split_y(b, set_addr),
757 nir_imm_int(b, UINT32_MAX),
758 desc_offset);
759 }
760
761 case nir_address_format_32bit_index_offset:
762 return nir_vec2(b,
763 nir_imm_int(b, state->set[set].desc_offset),
764 nir_iadd_imm(b,
765 nir_imul_imm(b,
766 array_index,
767 bind_layout->descriptor_surface_stride),
768 bind_layout->descriptor_surface_offset));
769
770 default:
771 unreachable("Unhandled address format");
772 }
773 }
774
775 static unsigned
binding_descriptor_offset(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)776 binding_descriptor_offset(const struct apply_pipeline_layout_state *state,
777 const struct anv_descriptor_set_binding_layout *bind_layout,
778 bool sampler)
779 {
780 if (sampler &&
781 state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
782 return bind_layout->descriptor_sampler_offset;
783
784 return bind_layout->descriptor_surface_offset;
785 }
786
787 static unsigned
binding_descriptor_stride(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * bind_layout,bool sampler)788 binding_descriptor_stride(const struct apply_pipeline_layout_state *state,
789 const struct anv_descriptor_set_binding_layout *bind_layout,
790 bool sampler)
791 {
792 if (sampler &&
793 state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT)
794 return bind_layout->descriptor_sampler_stride;
795
796 return bind_layout->descriptor_surface_stride;
797 }
798
799 static nir_def *
build_surface_index_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)800 build_surface_index_for_binding(nir_builder *b,
801 unsigned set, unsigned binding,
802 nir_def *array_index,
803 unsigned plane,
804 bool non_uniform,
805 const struct apply_pipeline_layout_state *state)
806 {
807 const struct anv_descriptor_set_binding_layout *bind_layout =
808 &state->layout->set[set].layout->binding[binding];
809 const unsigned descriptor_offset =
810 binding_descriptor_offset(state, bind_layout, false /* sampler */);
811 const unsigned descriptor_stride =
812 binding_descriptor_stride(state, bind_layout, false /* sampler */);
813 const bool is_bindless =
814 is_binding_bindless(set, binding, false /* sampler */, state);
815
816 nir_def *set_offset, *surface_index;
817 if (is_bindless) {
818 if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
819 set_offset = nir_imm_int(b, 0xdeaddead);
820
821 nir_def *desc_addr =
822 build_desc_addr_for_binding(b, set, binding, array_index, state);
823
824 surface_index =
825 build_load_descriptor_mem(b, desc_addr, 0, 1, 32, state);
826 } else {
827 set_offset =
828 nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
829 .base = offsetof(struct anv_push_constants,
830 desc_surface_offsets[set]),
831 .range = sizeof_field(struct anv_push_constants,
832 desc_surface_offsets[set]));
833
834 /* With bindless indexes are offsets in the descriptor buffer */
835 surface_index =
836 nir_iadd_imm(b,
837 nir_imul_imm(b, array_index, descriptor_stride),
838 descriptor_offset);
839 if (plane != 0) {
840 assert(plane < bind_layout->max_plane_count);
841 surface_index = nir_iadd_imm(b, surface_index,
842 plane * (descriptor_stride /
843 bind_layout->max_plane_count));
844 }
845
846 assert(descriptor_offset % 64 == 0);
847 assert(descriptor_stride % 64 == 0);
848 }
849 } else {
850 /* Unused */
851 set_offset = nir_imm_int(b, 0xdeaddead);
852
853 unsigned bti_stride = bti_multiplier(state, set, binding);
854 assert(bti_stride >= 1);
855
856 /* For Ycbcr descriptors, add the plane offset */
857 unsigned element_index = plane;
858
859 /* With the binding table, it's an index in the table */
860 surface_index =
861 nir_iadd_imm(b, nir_imul_imm(b, array_index, bti_stride),
862 state->set[set].binding[binding].surface_offset + element_index);
863 assert(state->set[set].binding[binding].surface_offset < MAX_BINDING_TABLE_SIZE);
864 }
865
866 return nir_resource_intel(b,
867 set_offset,
868 surface_index,
869 array_index,
870 .desc_set = set,
871 .binding = binding,
872 .resource_block_intel = state->set[set].binding[binding].push_block,
873 .resource_access_intel =
874 (is_bindless ? nir_resource_intel_bindless : 0) |
875 (non_uniform ? nir_resource_intel_non_uniform : 0) |
876 ((state->set[set].binding[binding].properties &
877 BINDING_PROPERTY_PUSHABLE) ? nir_resource_intel_pushable : 0));
878 }
879
880 static nir_def *
build_sampler_handle_for_binding(nir_builder * b,unsigned set,unsigned binding,nir_def * array_index,unsigned plane,bool non_uniform,const struct apply_pipeline_layout_state * state)881 build_sampler_handle_for_binding(nir_builder *b,
882 unsigned set, unsigned binding,
883 nir_def *array_index,
884 unsigned plane,
885 bool non_uniform,
886 const struct apply_pipeline_layout_state *state)
887 {
888 const struct anv_descriptor_set_binding_layout *bind_layout =
889 &state->layout->set[set].layout->binding[binding];
890 const unsigned descriptor_offset =
891 binding_descriptor_offset(state, bind_layout, true /* sampler */);
892 const unsigned descriptor_stride =
893 binding_descriptor_stride(state, bind_layout, true /* sampler */);
894 const bool is_bindless =
895 is_binding_bindless(set, binding, true /* sampler */, state);
896 nir_def *set_offset, *sampler_index;
897
898 if (is_bindless) {
899 if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
900 set_offset = nir_imm_int(b, 0xdeaddead);
901
902 nir_def *desc_addr =
903 build_desc_addr_for_binding(b, set, binding, array_index, state);
904
905 /* This is anv_sampled_image_descriptor, the sampler handle is always
906 * in component 1.
907 */
908 nir_def *desc_data =
909 build_load_descriptor_mem(b, desc_addr, 0, 2, 32, state);
910
911 sampler_index = nir_channel(b, desc_data, 1);
912 } else {
913 set_offset =
914 nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0),
915 .base = offsetof(struct anv_push_constants,
916 desc_sampler_offsets[set]),
917 .range = sizeof_field(struct anv_push_constants,
918 desc_sampler_offsets[set]));
919
920 uint32_t base_offset = descriptor_offset;
921
922 /* The SAMPLER_STATE can only be located at a 64 byte in the combined
923 * image/sampler case. Combined image/sampler is not supported to be
924 * used with mutable descriptor types.
925 */
926 if (bind_layout->data & ANV_DESCRIPTOR_SURFACE_SAMPLER)
927 base_offset += ANV_SURFACE_STATE_SIZE;
928
929 if (plane != 0) {
930 assert(plane < bind_layout->max_plane_count);
931 base_offset += plane * (descriptor_stride /
932 bind_layout->max_plane_count);
933 }
934
935 sampler_index =
936 nir_iadd_imm(b,
937 nir_imul_imm(b, array_index, descriptor_stride),
938 base_offset);
939 }
940 } else {
941 /* Unused */
942 set_offset = nir_imm_int(b, 0xdeaddead);
943
944 sampler_index =
945 nir_iadd_imm(b, array_index,
946 state->set[set].binding[binding].sampler_offset + plane);
947 }
948
949 return nir_resource_intel(b, set_offset, sampler_index, array_index,
950 .desc_set = set,
951 .binding = binding,
952 .resource_access_intel =
953 (is_bindless ? nir_resource_intel_bindless : 0) |
954 (non_uniform ? nir_resource_intel_non_uniform : 0) |
955 nir_resource_intel_sampler);
956 }
957
958 static nir_def *
build_buffer_dynamic_offset_for_res_index(nir_builder * b,nir_def * dyn_offset_base,nir_def * array_index,struct apply_pipeline_layout_state * state)959 build_buffer_dynamic_offset_for_res_index(nir_builder *b,
960 nir_def *dyn_offset_base,
961 nir_def *array_index,
962 struct apply_pipeline_layout_state *state)
963 {
964 nir_def *dyn_offset_idx = nir_iadd(b, dyn_offset_base, array_index);
965
966 nir_def *dyn_load =
967 nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
968 .base = offsetof(struct anv_push_constants, dynamic_offsets),
969 .range = sizeof_field(struct anv_push_constants, dynamic_offsets));
970
971 return nir_bcsel(b, nir_ieq_imm(b, dyn_offset_base, 0xff),
972 nir_imm_int(b, 0), dyn_load);
973 }
974
975 /** Convert a Vulkan resource index into a buffer address
976 *
977 * In some cases, this does a memory load from the descriptor set and, in
978 * others, it simply converts from one form to another.
979 *
980 * See build_res_index for details about each resource index format.
981 */
982 static nir_def *
build_indirect_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)983 build_indirect_buffer_addr_for_res_index(nir_builder *b,
984 const VkDescriptorType desc_type,
985 nir_def *res_index,
986 nir_address_format addr_format,
987 struct apply_pipeline_layout_state *state)
988 {
989 struct res_index_defs res = unpack_res_index(b, res_index);
990
991 if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
992 assert(addr_format == state->desc_addr_format);
993 return build_desc_addr_for_res_index(b, desc_type, res_index,
994 addr_format, state);
995 } else if (addr_format == nir_address_format_32bit_index_offset) {
996 return nir_vec2(b, nir_iadd(b, res.bti_idx, res.array_index),
997 nir_imm_int(b, 0));
998 }
999
1000 nir_def *desc_addr =
1001 build_desc_addr_for_res_index(b, desc_type, res_index,
1002 addr_format, state);
1003
1004 nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1005
1006 if (state->has_dynamic_buffers) {
1007 /* This shader has dynamic offsets and we have no way of knowing
1008 * (save from the dynamic offset base index) if this buffer has a
1009 * dynamic offset.
1010 */
1011 nir_def *dyn_offset_idx =
1012 nir_iadd(b, res.dyn_offset_base, res.array_index);
1013
1014 nir_def *dyn_load =
1015 nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
1016 .base = offsetof(struct anv_push_constants, dynamic_offsets),
1017 .range = MAX_DYNAMIC_BUFFERS * 4);
1018
1019 nir_def *dynamic_offset =
1020 nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
1021 nir_imm_int(b, 0), dyn_load);
1022
1023 /* The dynamic offset gets added to the base pointer so that we
1024 * have a sliding window range.
1025 */
1026 nir_def *base_ptr =
1027 nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
1028 base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1029 desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1030 nir_unpack_64_2x32_split_y(b, base_ptr),
1031 nir_channel(b, desc, 2),
1032 nir_channel(b, desc, 3));
1033 }
1034
1035 /* The last element of the vec4 is always zero.
1036 *
1037 * See also struct anv_address_range_descriptor
1038 */
1039 return nir_vec4(b, nir_channel(b, desc, 0),
1040 nir_channel(b, desc, 1),
1041 nir_channel(b, desc, 2),
1042 nir_imm_int(b, 0));
1043 }
1044
1045 static nir_def *
build_direct_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1046 build_direct_buffer_addr_for_res_index(nir_builder *b,
1047 const VkDescriptorType desc_type,
1048 nir_def *res_index,
1049 nir_address_format addr_format,
1050 struct apply_pipeline_layout_state *state)
1051 {
1052 if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1053 assert(addr_format == state->desc_addr_format);
1054 return build_desc_addr_for_res_index(b, desc_type, res_index,
1055 addr_format, state);
1056 } else if (addr_format == nir_address_format_32bit_index_offset) {
1057 struct res_index_defs res = unpack_res_index(b, res_index);
1058
1059 return nir_vec2(b, nir_iadd(b, res.desc_offset_base,
1060 nir_imul(b, res.array_index, res.desc_stride)),
1061 nir_imm_int(b, 0));
1062 }
1063
1064 nir_def *desc_addr =
1065 build_desc_addr_for_res_index(b, desc_type, res_index,
1066 addr_format, state);
1067
1068 nir_def *addr =
1069 build_load_render_surface_state_address(b, desc_addr, state);
1070
1071 if (state->has_dynamic_buffers) {
1072 struct res_index_defs res = unpack_res_index(b, res_index);
1073
1074 /* This shader has dynamic offsets and we have no way of knowing (save
1075 * from the dynamic offset base index) if this buffer has a dynamic
1076 * offset.
1077 */
1078 nir_def *dynamic_offset =
1079 build_buffer_dynamic_offset_for_res_index(
1080 b, res.dyn_offset_base, res.array_index, state);
1081
1082 /* The dynamic offset gets added to the base pointer so that we
1083 * have a sliding window range.
1084 */
1085 nir_def *base_ptr =
1086 nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
1087 base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
1088 addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
1089 nir_unpack_64_2x32_split_y(b, base_ptr),
1090 nir_channel(b, addr, 2),
1091 nir_channel(b, addr, 3));
1092 }
1093
1094 /* The last element of the vec4 is always zero.
1095 *
1096 * See also struct anv_address_range_descriptor
1097 */
1098 return nir_vec4(b, nir_channel(b, addr, 0),
1099 nir_channel(b, addr, 1),
1100 nir_channel(b, addr, 2),
1101 nir_imm_int(b, 0));
1102 }
1103
1104 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1105 build_buffer_addr_for_res_index(nir_builder *b,
1106 const VkDescriptorType desc_type,
1107 nir_def *res_index,
1108 nir_address_format addr_format,
1109 struct apply_pipeline_layout_state *state)
1110 {
1111 if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT)
1112 return build_indirect_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1113 else
1114 return build_direct_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1115 }
1116
1117 static nir_def *
build_buffer_addr_for_binding(nir_builder * b,const VkDescriptorType desc_type,unsigned set,unsigned binding,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1118 build_buffer_addr_for_binding(nir_builder *b,
1119 const VkDescriptorType desc_type,
1120 unsigned set,
1121 unsigned binding,
1122 nir_def *res_index,
1123 nir_address_format addr_format,
1124 struct apply_pipeline_layout_state *state)
1125 {
1126 if (addr_format != nir_address_format_32bit_index_offset)
1127 return build_buffer_addr_for_res_index(b, desc_type, res_index, addr_format, state);
1128
1129 if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
1130 const struct anv_descriptor_set_binding_layout *bind_layout =
1131 &state->layout->set[set].layout->binding[binding];
1132 return nir_vec2(b,
1133 nir_imm_int(b, state->set[set].desc_offset),
1134 nir_imm_int(b, bind_layout->descriptor_surface_offset));
1135 }
1136
1137 struct res_index_defs res = unpack_res_index(b, res_index);
1138
1139 return nir_vec2(b,
1140 build_surface_index_for_binding(b, set, binding, res.array_index,
1141 0 /* plane */,
1142 false /* non_uniform */,
1143 state),
1144 nir_imm_int(b, 0));
1145 }
1146
1147 /** Loads descriptor memory for a variable-based deref chain
1148 *
1149 * The deref chain has to terminate at a variable with a descriptor_set and
1150 * binding set. This is used for images, textures, and samplers.
1151 */
1152 static nir_def *
build_load_var_deref_surface_handle(nir_builder * b,nir_deref_instr * deref,bool non_uniform,bool * out_is_bindless,struct apply_pipeline_layout_state * state)1153 build_load_var_deref_surface_handle(nir_builder *b, nir_deref_instr *deref,
1154 bool non_uniform,
1155 bool *out_is_bindless,
1156 struct apply_pipeline_layout_state *state)
1157 {
1158 nir_variable *var = nir_deref_instr_get_variable(deref);
1159
1160 const uint32_t set = var->data.descriptor_set;
1161 const uint32_t binding = var->data.binding;
1162
1163 *out_is_bindless =
1164 is_binding_bindless(set, binding, false /* sampler */, state);
1165
1166 nir_def *array_index;
1167 if (deref->deref_type != nir_deref_type_var) {
1168 assert(deref->deref_type == nir_deref_type_array);
1169 assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1170 array_index = deref->arr.index.ssa;
1171 } else {
1172 array_index = nir_imm_int(b, 0);
1173 }
1174
1175 return build_surface_index_for_binding(b, set, binding, array_index,
1176 0 /* plane */, non_uniform, state);
1177 }
1178
1179 /** A recursive form of build_res_index()
1180 *
1181 * This recursively walks a resource [re]index chain and builds the resource
1182 * index. It places the new code with the resource [re]index operation in the
1183 * hopes of better CSE. This means the cursor is not where you left it when
1184 * this function returns.
1185 */
1186 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)1187 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
1188 nir_address_format addr_format,
1189 uint32_t *set, uint32_t *binding,
1190 struct apply_pipeline_layout_state *state)
1191 {
1192 if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
1193 b->cursor = nir_before_instr(&intrin->instr);
1194 *set = nir_intrinsic_desc_set(intrin);
1195 *binding = nir_intrinsic_binding(intrin);
1196 return build_res_index(b, *set, *binding, intrin->src[0].ssa, state);
1197 } else {
1198 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
1199 nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
1200 nir_def *index =
1201 build_res_index_for_chain(b, parent, addr_format,
1202 set, binding, state);
1203
1204 b->cursor = nir_before_instr(&intrin->instr);
1205
1206 return build_res_reindex(b, index, intrin->src[1].ssa);
1207 }
1208 }
1209
1210 /** Builds a buffer address for a given vulkan [re]index intrinsic
1211 *
1212 * The cursor is not where you left it when this function returns.
1213 */
1214 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1215 build_buffer_addr_for_idx_intrin(nir_builder *b,
1216 nir_intrinsic_instr *idx_intrin,
1217 nir_address_format addr_format,
1218 struct apply_pipeline_layout_state *state)
1219 {
1220 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1221 nir_def *res_index =
1222 build_res_index_for_chain(b, idx_intrin, addr_format,
1223 &set, &binding, state);
1224
1225 const struct anv_descriptor_set_binding_layout *bind_layout =
1226 &state->layout->set[set].layout->binding[binding];
1227
1228 return build_buffer_addr_for_binding(b, bind_layout->type,
1229 set, binding, res_index,
1230 addr_format, state);
1231 }
1232
1233 /** Builds a buffer address for deref chain
1234 *
1235 * This assumes that you can chase the chain all the way back to the original
1236 * vulkan_resource_index intrinsic.
1237 *
1238 * The cursor is not where you left it when this function returns.
1239 */
1240 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)1241 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
1242 nir_address_format addr_format,
1243 struct apply_pipeline_layout_state *state)
1244 {
1245 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1246 if (parent) {
1247 nir_def *addr =
1248 build_buffer_addr_for_deref(b, parent, addr_format, state);
1249
1250 b->cursor = nir_before_instr(&deref->instr);
1251 return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
1252 }
1253
1254 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
1255 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1256
1257 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1258
1259 b->cursor = nir_before_instr(&deref->instr);
1260
1261 return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
1262 }
1263
1264 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,bool is_atomic,struct apply_pipeline_layout_state * state)1265 try_lower_direct_buffer_intrinsic(nir_builder *b,
1266 nir_intrinsic_instr *intrin, bool is_atomic,
1267 struct apply_pipeline_layout_state *state)
1268 {
1269 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1270 if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
1271 return false;
1272
1273 nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
1274 if (desc == NULL) {
1275 /* We should always be able to find the descriptor for UBO access. */
1276 assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
1277 return false;
1278 }
1279
1280 const unsigned set = nir_intrinsic_desc_set(desc);
1281 const unsigned binding = nir_intrinsic_binding(desc);
1282
1283 const struct anv_descriptor_set_binding_layout *bind_layout =
1284 &state->layout->set[set].layout->binding[binding];
1285
1286 nir_address_format addr_format = descriptor_address_format(desc, state);
1287
1288 /* Although we could lower non uniform binding table accesses with
1289 * nir_opt_non_uniform_access, we might as well use an A64 message and
1290 * avoid the loops inserted by that lowering pass.
1291 */
1292 if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
1293 return false;
1294
1295 if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
1296 /* 64-bit atomics only support A64 messages so we can't lower them to
1297 * the index+offset model.
1298 */
1299 if (is_atomic && intrin->def.bit_size == 64 &&
1300 !state->pdevice->info.has_lsc)
1301 return false;
1302
1303 /* If we don't have a BTI for this binding and we're using indirect
1304 * descriptors, we'll use A64 messages. This is handled in the main
1305 * lowering path.
1306 */
1307 if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1308 !descriptor_has_bti(desc, state))
1309 return false;
1310
1311 /* Rewrite to 32bit_index_offset whenever we can */
1312 addr_format = nir_address_format_32bit_index_offset;
1313 } else {
1314 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
1315
1316 /* If we don't have a BTI for this binding and we're using indirect
1317 * descriptors, we'll use A64 messages. This is handled in the main
1318 * lowering path.
1319 *
1320 * We make an exception for uniform blocks which are built from the
1321 * descriptor set base address + offset. There is no indirect data to
1322 * fetch.
1323 */
1324 if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT &&
1325 bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK &&
1326 !descriptor_has_bti(desc, state))
1327 return false;
1328
1329 /* If this is an inline uniform and the shader stage is bindless, we
1330 * can't switch to 32bit_index_offset.
1331 */
1332 if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK ||
1333 !brw_shader_stage_requires_bindless_resources(b->shader->info.stage))
1334 addr_format = nir_address_format_32bit_index_offset;
1335 }
1336
1337 /* If a dynamic has not been assigned a binding table entry, we need to
1338 * bail here.
1339 */
1340 if ((bind_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1341 bind_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) &&
1342 !descriptor_has_bti(desc, state))
1343 return false;
1344
1345 nir_def *addr =
1346 build_buffer_addr_for_deref(b, deref, addr_format, state);
1347
1348 b->cursor = nir_before_instr(&intrin->instr);
1349 nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
1350
1351 return true;
1352 }
1353
1354 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)1355 lower_load_accel_struct_desc(nir_builder *b,
1356 nir_intrinsic_instr *load_desc,
1357 struct apply_pipeline_layout_state *state)
1358 {
1359 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
1360
1361 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
1362
1363 /* It doesn't really matter what address format we choose as
1364 * everything will constant-fold nicely. Choose one that uses the
1365 * actual descriptor buffer.
1366 */
1367 const nir_address_format addr_format =
1368 nir_address_format_64bit_bounded_global;
1369
1370 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1371 nir_def *res_index =
1372 build_res_index_for_chain(b, idx_intrin, addr_format,
1373 &set, &binding, state);
1374
1375 b->cursor = nir_before_instr(&load_desc->instr);
1376
1377 struct res_index_defs res = unpack_res_index(b, res_index);
1378 nir_def *desc_addr =
1379 build_desc_addr_for_binding(b, set, binding, res.array_index, state);
1380
1381 /* Acceleration structure descriptors are always uint64_t */
1382 nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
1383
1384 assert(load_desc->def.bit_size == 64);
1385 assert(load_desc->def.num_components == 1);
1386 nir_def_rewrite_uses(&load_desc->def, desc);
1387 nir_instr_remove(&load_desc->instr);
1388
1389 return true;
1390 }
1391
1392 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)1393 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
1394 {
1395 struct apply_pipeline_layout_state *state = _state;
1396
1397 if (instr->type != nir_instr_type_intrinsic)
1398 return false;
1399
1400 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1401 switch (intrin->intrinsic) {
1402 case nir_intrinsic_load_deref:
1403 case nir_intrinsic_store_deref:
1404 return try_lower_direct_buffer_intrinsic(b, intrin, false, state);
1405
1406 case nir_intrinsic_deref_atomic:
1407 case nir_intrinsic_deref_atomic_swap:
1408 return try_lower_direct_buffer_intrinsic(b, intrin, true, state);
1409
1410 case nir_intrinsic_get_ssbo_size: {
1411 /* The get_ssbo_size intrinsic always just takes a
1412 * index/reindex intrinsic.
1413 */
1414 nir_intrinsic_instr *idx_intrin =
1415 find_descriptor_for_index_src(intrin->src[0], state);
1416 if (idx_intrin == NULL)
1417 return false;
1418
1419 /* We just checked that this is a BTI descriptor */
1420 const nir_address_format addr_format =
1421 nir_address_format_32bit_index_offset;
1422
1423 b->cursor = nir_before_instr(&intrin->instr);
1424
1425 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
1426 nir_def *res_index =
1427 build_res_index_for_chain(b, idx_intrin, addr_format,
1428 &set, &binding, state);
1429
1430 bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1431
1432 nir_def *surface_index =
1433 build_surface_index_for_binding(b, set, binding,
1434 nir_channel(b, res_index, 3),
1435 0 /* plane */,
1436 non_uniform,
1437 state);
1438
1439 nir_src_rewrite(&intrin->src[0], surface_index);
1440 _mesa_set_add(state->lowered_instrs, intrin);
1441 return true;
1442 }
1443
1444 case nir_intrinsic_load_vulkan_descriptor:
1445 if (nir_intrinsic_desc_type(intrin) ==
1446 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1447 return lower_load_accel_struct_desc(b, intrin, state);
1448 return false;
1449
1450 default:
1451 return false;
1452 }
1453 }
1454
1455 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1456 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1457 struct apply_pipeline_layout_state *state)
1458 {
1459 b->cursor = nir_before_instr(&intrin->instr);
1460
1461 nir_def *index =
1462 build_res_index(b, nir_intrinsic_desc_set(intrin),
1463 nir_intrinsic_binding(intrin),
1464 intrin->src[0].ssa,
1465 state);
1466
1467 assert(intrin->def.bit_size == index->bit_size);
1468 assert(intrin->def.num_components == index->num_components);
1469 nir_def_rewrite_uses(&intrin->def, index);
1470 nir_instr_remove(&intrin->instr);
1471
1472 return true;
1473 }
1474
1475 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1476 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1477 struct apply_pipeline_layout_state *state)
1478 {
1479 b->cursor = nir_before_instr(&intrin->instr);
1480
1481 nir_def *index =
1482 build_res_reindex(b, intrin->src[0].ssa,
1483 intrin->src[1].ssa);
1484
1485 assert(intrin->def.bit_size == index->bit_size);
1486 assert(intrin->def.num_components == index->num_components);
1487 nir_def_rewrite_uses(&intrin->def, index);
1488 nir_instr_remove(&intrin->instr);
1489
1490 return true;
1491 }
1492
1493 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1494 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
1495 struct apply_pipeline_layout_state *state)
1496 {
1497 b->cursor = nir_before_instr(&intrin->instr);
1498
1499 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
1500 nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
1501
1502 nir_def *desc =
1503 build_buffer_addr_for_res_index(b,
1504 desc_type, intrin->src[0].ssa,
1505 addr_format, state);
1506
1507 assert(intrin->def.bit_size == desc->bit_size);
1508 assert(intrin->def.num_components == desc->num_components);
1509 nir_def_rewrite_uses(&intrin->def, desc);
1510 nir_instr_remove(&intrin->instr);
1511
1512 return true;
1513 }
1514
1515 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1516 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
1517 struct apply_pipeline_layout_state *state)
1518 {
1519 if (_mesa_set_search(state->lowered_instrs, intrin))
1520 return false;
1521
1522 b->cursor = nir_before_instr(&intrin->instr);
1523
1524 const nir_address_format addr_format =
1525 nir_address_format_64bit_bounded_global;
1526
1527 nir_def *desc_addr =
1528 nir_build_addr_iadd_imm(
1529 b,
1530 build_desc_addr_for_res_index(b,
1531 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1532 intrin->src[0].ssa,
1533 addr_format, state),
1534 addr_format,
1535 nir_var_mem_ssbo,
1536 state->pdevice->isl_dev.ss.size);
1537
1538 nir_def *desc_range;
1539 if (state->layout->type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_INDIRECT) {
1540 /* Load the anv_address_range_descriptor */
1541 desc_range =
1542 build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
1543 } else {
1544 /* Build a vec4 similar to anv_address_range_descriptor using the
1545 * RENDER_SURFACE_STATE.
1546 */
1547 desc_range =
1548 build_load_render_surface_state_address(b, desc_addr, state);
1549 }
1550
1551 nir_def *size = nir_channel(b, desc_range, 2);
1552 nir_def_rewrite_uses(&intrin->def, size);
1553 nir_instr_remove(&intrin->instr);
1554
1555 return true;
1556 }
1557
1558 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1559 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1560 struct apply_pipeline_layout_state *state)
1561 {
1562 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1563
1564 b->cursor = nir_before_instr(&intrin->instr);
1565
1566 bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1567 bool is_bindless;
1568 nir_def *handle =
1569 build_load_var_deref_surface_handle(b, deref, non_uniform,
1570 &is_bindless, state);
1571 nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1572
1573 return true;
1574 }
1575
1576 static bool
lower_image_size_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1577 lower_image_size_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1578 struct apply_pipeline_layout_state *state)
1579 {
1580 if (nir_intrinsic_image_dim(intrin) != GLSL_SAMPLER_DIM_3D)
1581 return lower_image_intrinsic(b, intrin, state);
1582
1583 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1584
1585 b->cursor = nir_before_instr(&intrin->instr);
1586
1587 bool non_uniform = nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM;
1588 bool is_bindless;
1589 nir_def *handle =
1590 build_load_var_deref_surface_handle(b, deref, non_uniform,
1591 &is_bindless, state);
1592 nir_rewrite_image_intrinsic(intrin, handle, is_bindless);
1593
1594 nir_variable *var = nir_deref_instr_get_variable(deref);
1595 const uint32_t set = var->data.descriptor_set;
1596 const uint32_t binding = var->data.binding;
1597
1598 nir_def *array_index;
1599 if (deref->deref_type != nir_deref_type_var) {
1600 assert(deref->deref_type == nir_deref_type_array);
1601 assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
1602 array_index = deref->arr.index.ssa;
1603 } else {
1604 array_index = nir_imm_int(b, 0);
1605 }
1606
1607 nir_def *desc_addr = build_desc_addr_for_binding(
1608 b, set, binding, array_index, state);
1609
1610 b->cursor = nir_after_instr(&intrin->instr);
1611
1612 nir_def *image_depth =
1613 build_load_storage_3d_image_depth(b, desc_addr,
1614 nir_channel(b, &intrin->def, 2),
1615 state);
1616
1617 nir_def *comps[4] = {};
1618 for (unsigned c = 0; c < intrin->def.num_components; c++)
1619 comps[c] = c == 2 ? image_depth : nir_channel(b, &intrin->def, c);
1620
1621 nir_def *vec = nir_vec(b, comps, intrin->def.num_components);
1622 nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
1623
1624 return true;
1625 }
1626
1627 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1628 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
1629 struct apply_pipeline_layout_state *state)
1630 {
1631 b->cursor = nir_instr_remove(&intrin->instr);
1632
1633 /* Any constant-offset load_constant instructions should have been removed
1634 * by constant folding.
1635 */
1636 assert(!nir_src_is_const(intrin->src[0]));
1637 nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
1638 nir_intrinsic_base(intrin));
1639
1640 unsigned load_size = intrin->def.num_components *
1641 intrin->def.bit_size / 8;
1642 unsigned load_align = intrin->def.bit_size / 8;
1643
1644 assert(load_size < b->shader->constant_data_size);
1645 unsigned max_offset = b->shader->constant_data_size - load_size;
1646 offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
1647
1648 nir_def *const_data_addr = nir_pack_64_2x32_split(b,
1649 nir_iadd(b,
1650 nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
1651 offset),
1652 nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
1653
1654 nir_def *data =
1655 nir_load_global_constant(b, const_data_addr,
1656 load_align,
1657 intrin->def.num_components,
1658 intrin->def.bit_size);
1659
1660 nir_def_rewrite_uses(&intrin->def, data);
1661
1662 return true;
1663 }
1664
1665 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1666 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
1667 struct apply_pipeline_layout_state *state)
1668 {
1669 b->cursor = nir_instr_remove(&intrin->instr);
1670
1671 nir_def *base_workgroup_id =
1672 nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
1673 .base = offsetof(struct anv_push_constants, cs.base_work_group_id),
1674 .range = sizeof_field(struct anv_push_constants, cs.base_work_group_id));
1675 nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
1676
1677 return true;
1678 }
1679
1680 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned base_index,unsigned plane,struct apply_pipeline_layout_state * state)1681 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
1682 nir_tex_src_type deref_src_type,
1683 unsigned base_index, unsigned plane,
1684 struct apply_pipeline_layout_state *state)
1685 {
1686 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
1687 if (deref_src_idx < 0)
1688 return;
1689
1690 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1691 nir_variable *var = nir_deref_instr_get_variable(deref);
1692
1693 const bool is_sampler = deref_src_type == nir_tex_src_sampler_deref;
1694 const unsigned set = var->data.descriptor_set;
1695 const unsigned binding = var->data.binding;
1696 const bool bindless = is_binding_bindless(set, binding, is_sampler, state);
1697
1698 nir_def *array_index = NULL;
1699 if (deref->deref_type != nir_deref_type_var) {
1700 assert(deref->deref_type == nir_deref_type_array);
1701
1702 array_index = deref->arr.index.ssa;
1703 } else {
1704 array_index = nir_imm_int(b, 0);
1705 }
1706
1707 nir_tex_src_type offset_src_type;
1708 nir_def *index;
1709 if (deref_src_type == nir_tex_src_texture_deref) {
1710 index = build_surface_index_for_binding(b, set, binding, array_index,
1711 plane,
1712 tex->texture_non_uniform,
1713 state);
1714 offset_src_type = bindless ?
1715 nir_tex_src_texture_handle :
1716 nir_tex_src_texture_offset;
1717 } else {
1718 assert(deref_src_type == nir_tex_src_sampler_deref);
1719
1720 index = build_sampler_handle_for_binding(b, set, binding, array_index,
1721 plane,
1722 tex->sampler_non_uniform,
1723 state);
1724 offset_src_type = bindless ?
1725 nir_tex_src_sampler_handle :
1726 nir_tex_src_sampler_offset;
1727 }
1728
1729 nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1730 tex->src[deref_src_idx].src_type = offset_src_type;
1731 }
1732
1733 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1734 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1735 {
1736 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1737 if (plane_src_idx < 0)
1738 return 0;
1739
1740 unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1741
1742 nir_tex_instr_remove_src(tex, plane_src_idx);
1743
1744 return plane;
1745 }
1746
1747 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1748 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1749 unsigned start, unsigned end)
1750 {
1751 if (start == end - 1) {
1752 return srcs[start];
1753 } else {
1754 unsigned mid = start + (end - start) / 2;
1755 return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1756 build_def_array_select(b, srcs, idx, start, mid),
1757 build_def_array_select(b, srcs, idx, mid, end));
1758 }
1759 }
1760
1761 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1762 lower_tex(nir_builder *b, nir_tex_instr *tex,
1763 struct apply_pipeline_layout_state *state)
1764 {
1765 unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1766
1767 b->cursor = nir_before_instr(&tex->instr);
1768
1769 lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1770 tex->texture_index, plane, state);
1771 lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1772 tex->sampler_index, plane, state);
1773
1774 /* The whole lot will be embedded in the offset/handle source */
1775 tex->texture_index = 0;
1776 tex->sampler_index = 0;
1777
1778 return true;
1779 }
1780
1781 static bool
lower_ray_query_globals(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1782 lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin,
1783 struct apply_pipeline_layout_state *state)
1784 {
1785 b->cursor = nir_instr_remove(&intrin->instr);
1786
1787 nir_def *rq_globals =
1788 nir_load_push_constant(b, 1, 64, nir_imm_int(b, 0),
1789 .base = offsetof(struct anv_push_constants, ray_query_globals),
1790 .range = sizeof_field(struct anv_push_constants, ray_query_globals));
1791 nir_def_rewrite_uses(&intrin->def, rq_globals);
1792
1793 return true;
1794 }
1795
1796 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1797 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1798 {
1799 struct apply_pipeline_layout_state *state = _state;
1800
1801 switch (instr->type) {
1802 case nir_instr_type_intrinsic: {
1803 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1804 switch (intrin->intrinsic) {
1805 case nir_intrinsic_vulkan_resource_index:
1806 return lower_res_index_intrinsic(b, intrin, state);
1807 case nir_intrinsic_vulkan_resource_reindex:
1808 return lower_res_reindex_intrinsic(b, intrin, state);
1809 case nir_intrinsic_load_vulkan_descriptor:
1810 return lower_load_vulkan_descriptor(b, intrin, state);
1811 case nir_intrinsic_get_ssbo_size:
1812 return lower_get_ssbo_size(b, intrin, state);
1813 case nir_intrinsic_image_deref_load:
1814 case nir_intrinsic_image_deref_store:
1815 case nir_intrinsic_image_deref_atomic:
1816 case nir_intrinsic_image_deref_atomic_swap:
1817 case nir_intrinsic_image_deref_samples:
1818 case nir_intrinsic_image_deref_load_param_intel:
1819 case nir_intrinsic_image_deref_load_raw_intel:
1820 case nir_intrinsic_image_deref_store_raw_intel:
1821 case nir_intrinsic_image_deref_sparse_load:
1822 return lower_image_intrinsic(b, intrin, state);
1823 case nir_intrinsic_image_deref_size:
1824 return lower_image_size_intrinsic(b, intrin, state);
1825 case nir_intrinsic_load_constant:
1826 return lower_load_constant(b, intrin, state);
1827 case nir_intrinsic_load_base_workgroup_id:
1828 return lower_base_workgroup_id(b, intrin, state);
1829 case nir_intrinsic_load_ray_query_global_intel:
1830 return lower_ray_query_globals(b, intrin, state);
1831 default:
1832 return false;
1833 }
1834 break;
1835 }
1836 case nir_instr_type_tex:
1837 return lower_tex(b, nir_instr_as_tex(instr), state);
1838 default:
1839 return false;
1840 }
1841 }
1842
1843 struct binding_info {
1844 uint32_t binding;
1845 uint8_t set;
1846 uint16_t score;
1847 };
1848
1849 static int
compare_binding_infos(const void * _a,const void * _b)1850 compare_binding_infos(const void *_a, const void *_b)
1851 {
1852 const struct binding_info *a = _a, *b = _b;
1853 if (a->score != b->score)
1854 return b->score - a->score;
1855
1856 if (a->set != b->set)
1857 return a->set - b->set;
1858
1859 return a->binding - b->binding;
1860 }
1861
1862 #ifndef NDEBUG
1863 static void
anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout * layout,nir_shader * shader)1864 anv_validate_pipeline_layout(const struct anv_pipeline_sets_layout *layout,
1865 nir_shader *shader)
1866 {
1867 nir_foreach_function_impl(impl, shader) {
1868 nir_foreach_block(block, impl) {
1869 nir_foreach_instr(instr, block) {
1870 if (instr->type != nir_instr_type_intrinsic)
1871 continue;
1872
1873 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1874 if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
1875 continue;
1876
1877 unsigned set = nir_intrinsic_desc_set(intrin);
1878 assert(layout->set[set].layout);
1879 }
1880 }
1881 }
1882 }
1883 #endif
1884
1885 static bool
binding_is_promotable_to_push(const struct anv_descriptor_set_binding_layout * bind_layout)1886 binding_is_promotable_to_push(const struct anv_descriptor_set_binding_layout *bind_layout)
1887 {
1888 return (bind_layout->flags & non_pushable_binding_flags) == 0;
1889 }
1890
1891 static void
add_null_bti_entry(struct anv_pipeline_bind_map * map)1892 add_null_bti_entry(struct anv_pipeline_bind_map *map)
1893 {
1894 map->surface_to_descriptor[map->surface_count++] =
1895 (struct anv_pipeline_binding) {
1896 .set = ANV_DESCRIPTOR_SET_NULL,
1897 };
1898 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1899 }
1900
1901 static void
add_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_descriptor_set_binding_layout * bind_layout)1902 add_bti_entry(struct anv_pipeline_bind_map *map,
1903 uint32_t set,
1904 uint32_t binding,
1905 uint32_t element,
1906 uint32_t plane,
1907 const struct anv_descriptor_set_binding_layout *bind_layout)
1908 {
1909 map->surface_to_descriptor[map->surface_count++] =
1910 (struct anv_pipeline_binding) {
1911 .set = set,
1912 .binding = binding,
1913 .index = bind_layout->descriptor_index + element,
1914 .set_offset = bind_layout->descriptor_surface_offset +
1915 element * bind_layout->descriptor_surface_stride +
1916 plane * bind_layout->descriptor_data_surface_size,
1917 .plane = plane,
1918 };
1919 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1920 }
1921
1922 static void
add_dynamic_bti_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)1923 add_dynamic_bti_entry(struct anv_pipeline_bind_map *map,
1924 uint32_t set,
1925 uint32_t binding,
1926 uint32_t element,
1927 const struct anv_pipeline_sets_layout *layout,
1928 const struct anv_descriptor_set_binding_layout *bind_layout)
1929 {
1930 map->surface_to_descriptor[map->surface_count++] =
1931 (struct anv_pipeline_binding) {
1932 .set = set,
1933 .binding = binding,
1934 .index = bind_layout->descriptor_index + element,
1935 .set_offset = bind_layout->descriptor_surface_offset +
1936 element * bind_layout->descriptor_surface_stride,
1937 .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
1938 };
1939 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1940 }
1941
1942 static void
add_sampler_entry(struct anv_pipeline_bind_map * map,uint32_t set,uint32_t binding,uint32_t element,uint32_t plane,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)1943 add_sampler_entry(struct anv_pipeline_bind_map *map,
1944 uint32_t set,
1945 uint32_t binding,
1946 uint32_t element,
1947 uint32_t plane,
1948 const struct anv_pipeline_sets_layout *layout,
1949 const struct anv_descriptor_set_binding_layout *bind_layout)
1950 {
1951 assert((bind_layout->descriptor_index + element) < layout->set[set].layout->descriptor_count);
1952 map->sampler_to_descriptor[map->sampler_count++] =
1953 (struct anv_pipeline_binding) {
1954 .set = set,
1955 .binding = binding,
1956 .index = bind_layout->descriptor_index + element,
1957 .plane = plane,
1958 };
1959 }
1960
1961 static void
add_push_entry(struct anv_pipeline_push_map * push_map,uint32_t set,uint32_t binding,uint32_t element,const struct anv_pipeline_sets_layout * layout,const struct anv_descriptor_set_binding_layout * bind_layout)1962 add_push_entry(struct anv_pipeline_push_map *push_map,
1963 uint32_t set,
1964 uint32_t binding,
1965 uint32_t element,
1966 const struct anv_pipeline_sets_layout *layout,
1967 const struct anv_descriptor_set_binding_layout *bind_layout)
1968 {
1969 push_map->block_to_descriptor[push_map->block_count++] =
1970 (struct anv_pipeline_binding) {
1971 .set = set,
1972 .binding = binding,
1973 .index = bind_layout->descriptor_index + element,
1974 .dynamic_offset_index = bind_layout->dynamic_offset_index + element,
1975 };
1976 }
1977
1978 static bool
binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * binding)1979 binding_should_use_surface_binding_table(const struct apply_pipeline_layout_state *state,
1980 const struct anv_descriptor_set_binding_layout *binding)
1981 {
1982 if ((binding->data & ANV_DESCRIPTOR_BTI_SURFACE_STATE) == 0)
1983 return false;
1984
1985 if (state->pdevice->always_use_bindless &&
1986 (binding->data & ANV_DESCRIPTOR_SURFACE))
1987 return false;
1988
1989 return true;
1990 }
1991
1992 static bool
binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state * state,const struct anv_descriptor_set_binding_layout * binding)1993 binding_should_use_sampler_binding_table(const struct apply_pipeline_layout_state *state,
1994 const struct anv_descriptor_set_binding_layout *binding)
1995 {
1996 if ((binding->data & ANV_DESCRIPTOR_BTI_SAMPLER_STATE) == 0)
1997 return false;
1998
1999 if (state->pdevice->always_use_bindless &&
2000 (binding->data & ANV_DESCRIPTOR_SAMPLER))
2001 return false;
2002
2003 return true;
2004 }
2005
2006 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum brw_robustness_flags robust_flags,bool independent_sets,const struct anv_pipeline_sets_layout * layout,struct anv_pipeline_bind_map * map,struct anv_pipeline_push_map * push_map,void * push_map_mem_ctx)2007 anv_nir_apply_pipeline_layout(nir_shader *shader,
2008 const struct anv_physical_device *pdevice,
2009 enum brw_robustness_flags robust_flags,
2010 bool independent_sets,
2011 const struct anv_pipeline_sets_layout *layout,
2012 struct anv_pipeline_bind_map *map,
2013 struct anv_pipeline_push_map *push_map,
2014 void *push_map_mem_ctx)
2015 {
2016 void *mem_ctx = ralloc_context(NULL);
2017
2018 #ifndef NDEBUG
2019 /* We should not have have any reference to a descriptor set that is not
2020 * given through the pipeline layout (layout->set[set].layout = NULL).
2021 */
2022 anv_validate_pipeline_layout(layout, shader);
2023 #endif
2024
2025 const bool bindless_stage =
2026 brw_shader_stage_requires_bindless_resources(shader->info.stage);
2027 struct apply_pipeline_layout_state state = {
2028 .pdevice = pdevice,
2029 .layout = layout,
2030 .desc_addr_format = bindless_stage ?
2031 nir_address_format_64bit_global_32bit_offset :
2032 nir_address_format_32bit_index_offset,
2033 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
2034 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
2035 .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
2036 .has_independent_sets = independent_sets,
2037 };
2038
2039 /* Compute the amount of push block items required. */
2040 unsigned push_block_count = 0;
2041 for (unsigned s = 0; s < layout->num_sets; s++) {
2042 if (!layout->set[s].layout)
2043 continue;
2044
2045 const unsigned count = layout->set[s].layout->binding_count;
2046 state.set[s].binding = rzalloc_array_size(mem_ctx, sizeof(state.set[s].binding[0]), count);
2047
2048 const struct anv_descriptor_set_layout *set_layout = layout->set[s].layout;
2049 for (unsigned b = 0; b < set_layout->binding_count; b++) {
2050 if (set_layout->binding[b].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
2051 push_block_count += set_layout->binding[b].array_size;
2052 }
2053 }
2054
2055 /* Find all use sets/bindings */
2056 nir_shader_instructions_pass(shader, get_used_bindings,
2057 nir_metadata_all, &state);
2058
2059 /* Assign a BTI to each used descriptor set */
2060 for (unsigned s = 0; s < layout->num_sets; s++) {
2061 if (state.desc_addr_format != nir_address_format_32bit_index_offset) {
2062 state.set[s].desc_offset = BINDLESS_OFFSET;
2063 } else if (state.set[s].desc_buffer_used) {
2064 map->surface_to_descriptor[map->surface_count] =
2065 (struct anv_pipeline_binding) {
2066 .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
2067 .binding = UINT32_MAX,
2068 .index = s,
2069 };
2070 state.set[s].desc_offset = map->surface_count++;
2071 }
2072 }
2073
2074 /* Assign a block index for each surface */
2075 push_map->block_to_descriptor =
2076 rzalloc_array(push_map_mem_ctx, struct anv_pipeline_binding,
2077 map->surface_count + push_block_count);
2078
2079 memcpy(push_map->block_to_descriptor,
2080 map->surface_to_descriptor,
2081 sizeof(push_map->block_to_descriptor[0]) * map->surface_count);
2082 push_map->block_count = map->surface_count;
2083
2084 /* Count used bindings and add push blocks for promotion to push
2085 * constants
2086 */
2087 unsigned used_binding_count = 0;
2088 for (uint32_t set = 0; set < layout->num_sets; set++) {
2089 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2090 if (!set_layout)
2091 continue;
2092
2093 for (unsigned b = 0; b < set_layout->binding_count; b++) {
2094 if (state.set[set].binding[b].use_count == 0)
2095 continue;
2096
2097 used_binding_count++;
2098
2099 const struct anv_descriptor_set_binding_layout *bind_layout =
2100 &set_layout->binding[b];
2101 if (!binding_is_promotable_to_push(bind_layout))
2102 continue;
2103
2104 if (bind_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
2105 state.set[set].binding[b].push_block = push_map->block_count;
2106 for (unsigned i = 0; i < bind_layout->array_size; i++)
2107 add_push_entry(push_map, set, b, i, layout, bind_layout);
2108 } else {
2109 state.set[set].binding[b].push_block = state.set[set].desc_offset;
2110 }
2111 }
2112 }
2113
2114 struct binding_info *infos =
2115 rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
2116 used_binding_count = 0;
2117 for (uint32_t set = 0; set < layout->num_sets; set++) {
2118 const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2119 if (!set_layout)
2120 continue;
2121
2122 for (unsigned b = 0; b < set_layout->binding_count; b++) {
2123 if (state.set[set].binding[b].use_count == 0)
2124 continue;
2125
2126 const struct anv_descriptor_set_binding_layout *binding =
2127 &layout->set[set].layout->binding[b];
2128
2129 /* Do a fixed-point calculation to generate a score based on the
2130 * number of uses and the binding array size. We shift by 7 instead
2131 * of 8 because we're going to use the top bit below to make
2132 * everything which does not support bindless super higher priority
2133 * than things which do.
2134 */
2135 uint16_t score = ((uint16_t)state.set[set].binding[b].use_count << 7) /
2136 binding->array_size;
2137
2138 /* If the descriptor type doesn't support bindless then put it at the
2139 * beginning so we guarantee it gets a slot.
2140 */
2141 if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
2142 !anv_descriptor_supports_bindless(pdevice, binding, false))
2143 score |= 1 << 15;
2144
2145 infos[used_binding_count++] = (struct binding_info) {
2146 .set = set,
2147 .binding = b,
2148 .score = score,
2149 };
2150 }
2151 }
2152
2153 /* Order the binding infos based on score with highest scores first. If
2154 * scores are equal we then order by set and binding.
2155 */
2156 qsort(infos, used_binding_count, sizeof(struct binding_info),
2157 compare_binding_infos);
2158
2159 for (unsigned i = 0; i < used_binding_count; i++) {
2160 unsigned set = infos[i].set, b = infos[i].binding;
2161 assert(layout->set[set].layout);
2162 const struct anv_descriptor_set_binding_layout *binding =
2163 &layout->set[set].layout->binding[b];
2164
2165 const uint32_t array_size = binding->array_size;
2166
2167 if (binding->dynamic_offset_index >= 0)
2168 state.has_dynamic_buffers = true;
2169
2170 const unsigned array_multiplier = bti_multiplier(&state, set, b);
2171 assert(array_multiplier >= 1);
2172
2173 /* Assume bindless by default */
2174 state.set[set].binding[b].surface_offset = BINDLESS_OFFSET;
2175 state.set[set].binding[b].sampler_offset = BINDLESS_OFFSET;
2176
2177 if (binding_should_use_surface_binding_table(&state, binding)) {
2178 if (map->surface_count + array_size * array_multiplier > MAX_BINDING_TABLE_SIZE ||
2179 anv_descriptor_requires_bindless(pdevice, binding, false) ||
2180 brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2181 /* If this descriptor doesn't fit in the binding table or if it
2182 * requires bindless for some reason, flag it as bindless.
2183 */
2184 assert(anv_descriptor_supports_bindless(pdevice, binding, false));
2185 } else {
2186 state.set[set].binding[b].surface_offset = map->surface_count;
2187 if (binding->dynamic_offset_index < 0) {
2188 struct anv_sampler **samplers = binding->immutable_samplers;
2189 uint8_t max_planes = bti_multiplier(&state, set, b);
2190 for (unsigned i = 0; i < binding->array_size; i++) {
2191 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
2192 for (uint8_t p = 0; p < max_planes; p++) {
2193 if (p < planes) {
2194 add_bti_entry(map, set, b, i, p, binding);
2195 } else {
2196 add_null_bti_entry(map);
2197 }
2198 }
2199 }
2200 } else {
2201 for (unsigned i = 0; i < binding->array_size; i++)
2202 add_dynamic_bti_entry(map, set, b, i, layout, binding);
2203 }
2204 }
2205 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
2206 }
2207
2208 if (binding_should_use_sampler_binding_table(&state, binding)) {
2209 if (map->sampler_count + array_size * array_multiplier > MAX_SAMPLER_TABLE_SIZE ||
2210 anv_descriptor_requires_bindless(pdevice, binding, true) ||
2211 brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
2212 /* If this descriptor doesn't fit in the binding table or if it
2213 * requires bindless for some reason, flag it as bindless.
2214 *
2215 * We also make large sampler arrays bindless because we can avoid
2216 * using indirect sends thanks to bindless samplers being packed
2217 * less tightly than the sampler table.
2218 */
2219 assert(anv_descriptor_supports_bindless(pdevice, binding, true));
2220 } else {
2221 state.set[set].binding[b].sampler_offset = map->sampler_count;
2222 uint8_t max_planes = bti_multiplier(&state, set, b);
2223 for (unsigned i = 0; i < binding->array_size; i++) {
2224 for (uint8_t p = 0; p < max_planes; p++) {
2225 add_sampler_entry(map, set, b, i, p, layout, binding);
2226 }
2227 }
2228 }
2229 }
2230
2231 if (binding->data & ANV_DESCRIPTOR_INLINE_UNIFORM) {
2232 state.set[set].binding[b].surface_offset = state.set[set].desc_offset;
2233 }
2234
2235 #if 0
2236 fprintf(stderr, "set=%u binding=%u surface_offset=0x%08x require_bindless=%u type=%s\n",
2237 set, b,
2238 state.set[set].binding[b].surface_offset,
2239 anv_descriptor_requires_bindless(pdevice, binding, false),
2240 vk_DescriptorType_to_str(binding->type));
2241 #endif
2242 }
2243
2244 /* Before we do the normal lowering, we look for any SSBO operations
2245 * that we can lower to the BTI model and lower them up-front. The BTI
2246 * model can perform better than the A64 model for a couple reasons:
2247 *
2248 * 1. 48-bit address calculations are potentially expensive and using
2249 * the BTI model lets us simply compute 32-bit offsets and the
2250 * hardware adds the 64-bit surface base address.
2251 *
2252 * 2. The BTI messages, because they use surface states, do bounds
2253 * checking for us. With the A64 model, we have to do our own
2254 * bounds checking and this means wider pointers and extra
2255 * calculations and branching in the shader.
2256 *
2257 * The solution to both of these is to convert things to the BTI model
2258 * opportunistically. The reason why we need to do this as a pre-pass
2259 * is for two reasons:
2260 *
2261 * 1. The BTI model requires nir_address_format_32bit_index_offset
2262 * pointers which are not the same type as the pointers needed for
2263 * the A64 model. Because all our derefs are set up for the A64
2264 * model (in case we have variable pointers), we have to crawl all
2265 * the way back to the vulkan_resource_index intrinsic and build a
2266 * completely fresh index+offset calculation.
2267 *
2268 * 2. Because the variable-pointers-capable lowering that we do as part
2269 * of apply_pipeline_layout_block is destructive (It really has to
2270 * be to handle variable pointers properly), we've lost the deref
2271 * information by the time we get to the load/store/atomic
2272 * intrinsics in that pass.
2273 */
2274 nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
2275 nir_metadata_block_index |
2276 nir_metadata_dominance,
2277 &state);
2278
2279 /* We just got rid of all the direct access. Delete it so it's not in the
2280 * way when we do our indirect lowering.
2281 */
2282 nir_opt_dce(shader);
2283
2284 nir_shader_instructions_pass(shader, apply_pipeline_layout,
2285 nir_metadata_block_index |
2286 nir_metadata_dominance,
2287 &state);
2288
2289 ralloc_free(mem_ctx);
2290
2291 if (brw_shader_stage_is_bindless(shader->info.stage)) {
2292 assert(map->surface_count == 0);
2293 assert(map->sampler_count == 0);
2294 }
2295
2296 #if 0
2297 fprintf(stderr, "bti:\n");
2298 for (unsigned i = 0; i < map->surface_count; i++) {
2299 fprintf(stderr, " %03i: set=%03u binding=%06i index=%u plane=%u set_offset=0x%08x dyn_offset=0x%08x\n", i,
2300 map->surface_to_descriptor[i].set,
2301 map->surface_to_descriptor[i].binding,
2302 map->surface_to_descriptor[i].index,
2303 map->surface_to_descriptor[i].plane,
2304 map->surface_to_descriptor[i].set_offset,
2305 map->surface_to_descriptor[i].dynamic_offset_index);
2306 }
2307 fprintf(stderr, "sti:\n");
2308 for (unsigned i = 0; i < map->sampler_count; i++) {
2309 fprintf(stderr, " %03i: set=%03u binding=%06i index=%u plane=%u\n", i,
2310 map->sampler_to_descriptor[i].set,
2311 map->sampler_to_descriptor[i].binding,
2312 map->sampler_to_descriptor[i].index,
2313 map->sampler_to_descriptor[i].plane);
2314 }
2315 #endif
2316
2317 /* Now that we're done computing the surface and sampler portions of the
2318 * bind map, hash them. This lets us quickly determine if the actual
2319 * mapping has changed and not just a no-op pipeline change.
2320 */
2321 _mesa_sha1_compute(map->surface_to_descriptor,
2322 map->surface_count * sizeof(struct anv_pipeline_binding),
2323 map->surface_sha1);
2324 _mesa_sha1_compute(map->sampler_to_descriptor,
2325 map->sampler_count * sizeof(struct anv_pipeline_binding),
2326 map->sampler_sha1);
2327 }
2328