1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "program/prog_parameter.h"
26 #include "nir/nir_builder.h"
27 #include "compiler/brw_nir.h"
28 #include "util/mesa-sha1.h"
29 #include "util/set.h"
30
31 /* Sampler tables don't actually have a maximum size but we pick one just so
32 * that we don't end up emitting too much state on-the-fly.
33 */
34 #define MAX_SAMPLER_TABLE_SIZE 128
35 #define BINDLESS_OFFSET 255
36
37 #define sizeof_field(type, field) sizeof(((type *)0)->field)
38
39 struct apply_pipeline_layout_state {
40 const struct anv_physical_device *pdevice;
41
42 const struct anv_pipeline_layout *layout;
43 bool add_bounds_checks;
44 nir_address_format desc_addr_format;
45 nir_address_format ssbo_addr_format;
46 nir_address_format ubo_addr_format;
47
48 /* Place to flag lowered instructions so we don't lower them twice */
49 struct set *lowered_instrs;
50
51 bool uses_constants;
52 bool has_dynamic_buffers;
53 uint8_t constants_offset;
54 struct {
55 bool desc_buffer_used;
56 uint8_t desc_offset;
57
58 uint8_t *use_count;
59 uint8_t *surface_offsets;
60 uint8_t *sampler_offsets;
61 } set[MAX_SETS];
62 };
63
64 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)65 addr_format_for_desc_type(VkDescriptorType desc_type,
66 struct apply_pipeline_layout_state *state)
67 {
68 switch (desc_type) {
69 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
70 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
71 return state->ssbo_addr_format;
72
73 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
74 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
75 return state->ubo_addr_format;
76
77 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
78 return state->desc_addr_format;
79
80 default:
81 unreachable("Unsupported descriptor type");
82 }
83 }
84
85 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)86 add_binding(struct apply_pipeline_layout_state *state,
87 uint32_t set, uint32_t binding)
88 {
89 const struct anv_descriptor_set_binding_layout *bind_layout =
90 &state->layout->set[set].layout->binding[binding];
91
92 if (state->set[set].use_count[binding] < UINT8_MAX)
93 state->set[set].use_count[binding]++;
94
95 /* Only flag the descriptor buffer as used if there's actually data for
96 * this binding. This lets us be lazy and call this function constantly
97 * without worrying about unnecessarily enabling the buffer.
98 */
99 if (bind_layout->descriptor_stride)
100 state->set[set].desc_buffer_used = true;
101 }
102
103 static void
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)104 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
105 {
106 nir_deref_instr *deref = nir_src_as_deref(src);
107 nir_variable *var = nir_deref_instr_get_variable(deref);
108 add_binding(state, var->data.descriptor_set, var->data.binding);
109 }
110
111 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)112 add_tex_src_binding(struct apply_pipeline_layout_state *state,
113 nir_tex_instr *tex, nir_tex_src_type deref_src_type)
114 {
115 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
116 if (deref_src_idx < 0)
117 return;
118
119 add_deref_src_binding(state, tex->src[deref_src_idx].src);
120 }
121
122 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)123 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
124 {
125 struct apply_pipeline_layout_state *state = _state;
126
127 switch (instr->type) {
128 case nir_instr_type_intrinsic: {
129 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
130 switch (intrin->intrinsic) {
131 case nir_intrinsic_vulkan_resource_index:
132 add_binding(state, nir_intrinsic_desc_set(intrin),
133 nir_intrinsic_binding(intrin));
134 break;
135
136 case nir_intrinsic_image_deref_load:
137 case nir_intrinsic_image_deref_store:
138 case nir_intrinsic_image_deref_atomic_add:
139 case nir_intrinsic_image_deref_atomic_imin:
140 case nir_intrinsic_image_deref_atomic_umin:
141 case nir_intrinsic_image_deref_atomic_imax:
142 case nir_intrinsic_image_deref_atomic_umax:
143 case nir_intrinsic_image_deref_atomic_and:
144 case nir_intrinsic_image_deref_atomic_or:
145 case nir_intrinsic_image_deref_atomic_xor:
146 case nir_intrinsic_image_deref_atomic_exchange:
147 case nir_intrinsic_image_deref_atomic_comp_swap:
148 case nir_intrinsic_image_deref_atomic_fadd:
149 case nir_intrinsic_image_deref_size:
150 case nir_intrinsic_image_deref_samples:
151 case nir_intrinsic_image_deref_load_param_intel:
152 case nir_intrinsic_image_deref_load_raw_intel:
153 case nir_intrinsic_image_deref_store_raw_intel:
154 add_deref_src_binding(state, intrin->src[0]);
155 break;
156
157 case nir_intrinsic_load_constant:
158 state->uses_constants = true;
159 break;
160
161 default:
162 break;
163 }
164 break;
165 }
166 case nir_instr_type_tex: {
167 nir_tex_instr *tex = nir_instr_as_tex(instr);
168 add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
169 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
170 break;
171 }
172 default:
173 break;
174 }
175
176 return false;
177 }
178
179 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)180 find_descriptor_for_index_src(nir_src src,
181 struct apply_pipeline_layout_state *state)
182 {
183 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
184
185 while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
186 intrin = nir_src_as_intrinsic(intrin->src[0]);
187
188 if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
189 return NULL;
190
191 return intrin;
192 }
193
194 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)195 descriptor_has_bti(nir_intrinsic_instr *intrin,
196 struct apply_pipeline_layout_state *state)
197 {
198 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
199
200 uint32_t set = nir_intrinsic_desc_set(intrin);
201 uint32_t binding = nir_intrinsic_binding(intrin);
202 const struct anv_descriptor_set_binding_layout *bind_layout =
203 &state->layout->set[set].layout->binding[binding];
204
205 uint32_t surface_index;
206 if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
207 surface_index = state->set[set].desc_offset;
208 else
209 surface_index = state->set[set].surface_offsets[binding];
210
211 /* Only lower to a BTI message if we have a valid binding table index. */
212 return surface_index < MAX_BINDING_TABLE_SIZE;
213 }
214
215 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)216 descriptor_address_format(nir_intrinsic_instr *intrin,
217 struct apply_pipeline_layout_state *state)
218 {
219 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
220
221 return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
222 }
223
224 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)225 nir_deref_find_descriptor(nir_deref_instr *deref,
226 struct apply_pipeline_layout_state *state)
227 {
228 while (1) {
229 /* Nothing we will use this on has a variable */
230 assert(deref->deref_type != nir_deref_type_var);
231
232 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
233 if (!parent)
234 break;
235
236 deref = parent;
237 }
238 assert(deref->deref_type == nir_deref_type_cast);
239
240 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
241 if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
242 return false;
243
244 return find_descriptor_for_index_src(intrin->src[0], state);
245 }
246
247 static nir_ssa_def *
build_load_descriptor_mem(nir_builder * b,nir_ssa_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)248 build_load_descriptor_mem(nir_builder *b,
249 nir_ssa_def *desc_addr, unsigned desc_offset,
250 unsigned num_components, unsigned bit_size,
251 struct apply_pipeline_layout_state *state)
252
253 {
254 switch (state->desc_addr_format) {
255 case nir_address_format_64bit_global_32bit_offset: {
256 nir_ssa_def *base_addr =
257 nir_pack_64_2x32(b, nir_channels(b, desc_addr, 0x3));
258 nir_ssa_def *offset32 =
259 nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset);
260
261 return nir_load_global_constant_offset(b, num_components, bit_size,
262 base_addr, offset32,
263 .align_mul = 8,
264 .align_offset = desc_offset % 8);
265 }
266
267 case nir_address_format_32bit_index_offset: {
268 nir_ssa_def *surface_index = nir_channel(b, desc_addr, 0);
269 nir_ssa_def *offset32 =
270 nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
271
272 return nir_load_ubo(b, num_components, bit_size,
273 surface_index, offset32,
274 .align_mul = 8,
275 .align_offset = desc_offset % 8,
276 .range_base = 0,
277 .range = ~0);
278 }
279
280 default:
281 unreachable("Unsupported address format");
282 }
283 }
284
285 /** Build a Vulkan resource index
286 *
287 * A "resource index" is the term used by our SPIR-V parser and the relevant
288 * NIR intrinsics for a reference into a descriptor set. It acts much like a
289 * deref in NIR except that it accesses opaque descriptors instead of memory.
290 *
291 * Coming out of SPIR-V, both the resource indices (in the form of
292 * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
293 * of nir_deref_instr) use the same vector component/bit size. The meaning
294 * of those values for memory derefs (nir_deref_instr) is given by the
295 * nir_address_format associated with the descriptor type. For resource
296 * indices, it's an entirely internal to ANV encoding which describes, in some
297 * sense, the address of the descriptor. Thanks to the NIR/SPIR-V rules, it
298 * must be packed into the same size SSA values as a memory address. For this
299 * reason, the actual encoding may depend both on the address format for
300 * memory derefs and the descriptor address format.
301 *
302 * The load_vulkan_descriptor intrinsic exists to provide a transition point
303 * between these two forms of derefs: descriptor and memory.
304 */
305 static nir_ssa_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_ssa_def * array_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)306 build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
307 nir_ssa_def *array_index, nir_address_format addr_format,
308 struct apply_pipeline_layout_state *state)
309 {
310 const struct anv_descriptor_set_binding_layout *bind_layout =
311 &state->layout->set[set].layout->binding[binding];
312
313 uint32_t array_size = bind_layout->array_size;
314
315 switch (addr_format) {
316 case nir_address_format_64bit_global_32bit_offset:
317 case nir_address_format_64bit_bounded_global: {
318 uint32_t set_idx;
319 switch (state->desc_addr_format) {
320 case nir_address_format_64bit_global_32bit_offset:
321 set_idx = set;
322 break;
323
324 case nir_address_format_32bit_index_offset:
325 assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
326 set_idx = state->set[set].desc_offset;
327 break;
328
329 default:
330 unreachable("Unsupported address format");
331 }
332
333 assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
334 uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */
335 if (bind_layout->dynamic_offset_index >= 0) {
336 dynamic_offset_index =
337 state->layout->set[set].dynamic_offset_start +
338 bind_layout->dynamic_offset_index;
339 }
340
341 const uint32_t packed = (bind_layout->descriptor_stride << 16 ) | (set_idx << 8) | dynamic_offset_index;
342
343 return nir_vec4(b, nir_imm_int(b, packed),
344 nir_imm_int(b, bind_layout->descriptor_offset),
345 nir_imm_int(b, array_size - 1),
346 array_index);
347 }
348
349 case nir_address_format_32bit_index_offset: {
350 assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
351 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
352 uint32_t surface_index = state->set[set].desc_offset;
353 return nir_imm_ivec2(b, surface_index,
354 bind_layout->descriptor_offset);
355 } else {
356 uint32_t surface_index = state->set[set].surface_offsets[binding];
357 assert(array_size > 0 && array_size <= UINT16_MAX);
358 assert(surface_index <= UINT16_MAX);
359 uint32_t packed = ((array_size - 1) << 16) | surface_index;
360 return nir_vec2(b, array_index, nir_imm_int(b, packed));
361 }
362 }
363
364 default:
365 unreachable("Unsupported address format");
366 }
367 }
368
369 struct res_index_defs {
370 nir_ssa_def *set_idx;
371 nir_ssa_def *dyn_offset_base;
372 nir_ssa_def *desc_offset_base;
373 nir_ssa_def *array_index;
374 nir_ssa_def *desc_stride;
375 };
376
377 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_ssa_def * index)378 unpack_res_index(nir_builder *b, nir_ssa_def *index)
379 {
380 struct res_index_defs defs;
381
382 nir_ssa_def *packed = nir_channel(b, index, 0);
383 defs.desc_stride = nir_extract_u8(b, packed, nir_imm_int(b, 2));
384 defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
385 defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
386
387 defs.desc_offset_base = nir_channel(b, index, 1);
388 defs.array_index = nir_umin(b, nir_channel(b, index, 2),
389 nir_channel(b, index, 3));
390
391 return defs;
392 }
393
394 /** Adjust a Vulkan resource index
395 *
396 * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
397 * For array descriptors, it allows us to adjust the array index. Thanks to
398 * variable pointers, we cannot always fold this re-index operation into the
399 * vulkan_resource_index intrinsic and we have to do it based on nothing but
400 * the address format.
401 */
402 static nir_ssa_def *
build_res_reindex(nir_builder * b,nir_ssa_def * orig,nir_ssa_def * delta,nir_address_format addr_format)403 build_res_reindex(nir_builder *b, nir_ssa_def *orig, nir_ssa_def *delta,
404 nir_address_format addr_format)
405 {
406 switch (addr_format) {
407 case nir_address_format_64bit_global_32bit_offset:
408 case nir_address_format_64bit_bounded_global:
409 return nir_vec4(b, nir_channel(b, orig, 0),
410 nir_channel(b, orig, 1),
411 nir_channel(b, orig, 2),
412 nir_iadd(b, nir_channel(b, orig, 3), delta));
413
414 case nir_address_format_32bit_index_offset:
415 return nir_vec2(b, nir_iadd(b, nir_channel(b, orig, 0), delta),
416 nir_channel(b, orig, 1));
417
418 default:
419 unreachable("Unhandled address format");
420 }
421 }
422
423 /** Get the address for a descriptor given its resource index
424 *
425 * Because of the re-indexing operations, we can't bounds check descriptor
426 * array access until we have the final index. That means we end up doing the
427 * bounds check here, if needed. See unpack_res_index() for more details.
428 *
429 * This function takes both a bind_layout and a desc_type which are used to
430 * determine the descriptor stride for array descriptors. The bind_layout is
431 * optional for buffer descriptor types.
432 */
433 static nir_ssa_def *
build_desc_addr(nir_builder * b,const struct anv_descriptor_set_binding_layout * bind_layout,const VkDescriptorType desc_type,nir_ssa_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)434 build_desc_addr(nir_builder *b,
435 const struct anv_descriptor_set_binding_layout *bind_layout,
436 const VkDescriptorType desc_type,
437 nir_ssa_def *index, nir_address_format addr_format,
438 struct apply_pipeline_layout_state *state)
439 {
440 switch (addr_format) {
441 case nir_address_format_64bit_global_32bit_offset:
442 case nir_address_format_64bit_bounded_global: {
443 struct res_index_defs res = unpack_res_index(b, index);
444
445 nir_ssa_def *desc_offset = res.desc_offset_base;
446 if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
447 /* Compute the actual descriptor offset. For inline uniform blocks,
448 * the array index is ignored as they are only allowed to be a single
449 * descriptor (not an array) and there is no concept of a "stride".
450 *
451 */
452 desc_offset =
453 nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
454 }
455
456 switch (state->desc_addr_format) {
457 case nir_address_format_64bit_global_32bit_offset: {
458 nir_ssa_def *base_addr =
459 nir_load_desc_set_address_intel(b, res.set_idx);
460 return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
461 nir_unpack_64_2x32_split_y(b, base_addr),
462 nir_imm_int(b, UINT32_MAX),
463 desc_offset);
464 }
465
466 case nir_address_format_32bit_index_offset:
467 return nir_vec2(b, res.set_idx, desc_offset);
468
469 default:
470 unreachable("Unhandled address format");
471 }
472 }
473
474 case nir_address_format_32bit_index_offset:
475 assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
476 assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
477 return index;
478
479 default:
480 unreachable("Unhandled address format");
481 }
482 }
483
484 /** Convert a Vulkan resource index into a buffer address
485 *
486 * In some cases, this does a memory load from the descriptor set and, in
487 * others, it simply converts from one form to another.
488 *
489 * See build_res_index for details about each resource index format.
490 */
491 static nir_ssa_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_ssa_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)492 build_buffer_addr_for_res_index(nir_builder *b,
493 const VkDescriptorType desc_type,
494 nir_ssa_def *res_index,
495 nir_address_format addr_format,
496 struct apply_pipeline_layout_state *state)
497 {
498 if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
499 assert(addr_format == state->desc_addr_format);
500 return build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
501 } else if (addr_format == nir_address_format_32bit_index_offset) {
502 nir_ssa_def *array_index = nir_channel(b, res_index, 0);
503 nir_ssa_def *packed = nir_channel(b, res_index, 1);
504 nir_ssa_def *array_max = nir_extract_u16(b, packed, nir_imm_int(b, 1));
505 nir_ssa_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
506
507 if (state->add_bounds_checks)
508 array_index = nir_umin(b, array_index, array_max);
509
510 return nir_vec2(b, nir_iadd(b, surface_index, array_index),
511 nir_imm_int(b, 0));
512 }
513
514 nir_ssa_def *desc_addr =
515 build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
516
517 nir_ssa_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
518
519 if (state->has_dynamic_buffers) {
520 struct res_index_defs res = unpack_res_index(b, res_index);
521
522 /* This shader has dynamic offsets and we have no way of knowing
523 * (save from the dynamic offset base index) if this buffer has a
524 * dynamic offset.
525 */
526 nir_ssa_def *dyn_offset_idx =
527 nir_iadd(b, res.dyn_offset_base, res.array_index);
528 if (state->add_bounds_checks) {
529 dyn_offset_idx = nir_umin(b, dyn_offset_idx,
530 nir_imm_int(b, MAX_DYNAMIC_BUFFERS));
531 }
532
533 nir_ssa_def *dyn_load =
534 nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
535 .base = offsetof(struct anv_push_constants, dynamic_offsets),
536 .range = MAX_DYNAMIC_BUFFERS * 4);
537
538 nir_ssa_def *dynamic_offset =
539 nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
540 nir_imm_int(b, 0), dyn_load);
541
542 /* The dynamic offset gets added to the base pointer so that we
543 * have a sliding window range.
544 */
545 nir_ssa_def *base_ptr =
546 nir_pack_64_2x32(b, nir_channels(b, desc, 0x3));
547 base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
548 desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
549 nir_unpack_64_2x32_split_y(b, base_ptr),
550 nir_channel(b, desc, 2),
551 nir_channel(b, desc, 3));
552 }
553
554 /* The last element of the vec4 is always zero.
555 *
556 * See also struct anv_address_range_descriptor
557 */
558 return nir_vec4(b, nir_channel(b, desc, 0),
559 nir_channel(b, desc, 1),
560 nir_channel(b, desc, 2),
561 nir_imm_int(b, 0));
562 }
563
564 /** Loads descriptor memory for a variable-based deref chain
565 *
566 * The deref chain has to terminate at a variable with a descriptor_set and
567 * binding set. This is used for images, textures, and samplers.
568 */
569 static nir_ssa_def *
build_load_var_deref_descriptor_mem(nir_builder * b,nir_deref_instr * deref,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)570 build_load_var_deref_descriptor_mem(nir_builder *b, nir_deref_instr *deref,
571 unsigned desc_offset,
572 unsigned num_components, unsigned bit_size,
573 struct apply_pipeline_layout_state *state)
574 {
575 nir_variable *var = nir_deref_instr_get_variable(deref);
576
577 const uint32_t set = var->data.descriptor_set;
578 const uint32_t binding = var->data.binding;
579 const struct anv_descriptor_set_binding_layout *bind_layout =
580 &state->layout->set[set].layout->binding[binding];
581
582 nir_ssa_def *array_index;
583 if (deref->deref_type != nir_deref_type_var) {
584 assert(deref->deref_type == nir_deref_type_array);
585 assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
586 assert(deref->arr.index.is_ssa);
587 array_index = deref->arr.index.ssa;
588 } else {
589 array_index = nir_imm_int(b, 0);
590 }
591
592 /* It doesn't really matter what address format we choose as everything
593 * will constant-fold nicely. Choose one that uses the actual descriptor
594 * buffer so we don't run into issues index/offset assumptions.
595 */
596 const nir_address_format addr_format =
597 nir_address_format_64bit_bounded_global;
598
599 nir_ssa_def *res_index =
600 build_res_index(b, set, binding, array_index, addr_format, state);
601
602 nir_ssa_def *desc_addr =
603 build_desc_addr(b, bind_layout, bind_layout->type,
604 res_index, addr_format, state);
605
606 return build_load_descriptor_mem(b, desc_addr, desc_offset,
607 num_components, bit_size, state);
608 }
609
610 /** A recursive form of build_res_index()
611 *
612 * This recursively walks a resource [re]index chain and builds the resource
613 * index. It places the new code with the resource [re]index operation in the
614 * hopes of better CSE. This means the cursor is not where you left it when
615 * this function returns.
616 */
617 static nir_ssa_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)618 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
619 nir_address_format addr_format,
620 uint32_t *set, uint32_t *binding,
621 struct apply_pipeline_layout_state *state)
622 {
623 if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
624 b->cursor = nir_before_instr(&intrin->instr);
625 assert(intrin->src[0].is_ssa);
626 *set = nir_intrinsic_desc_set(intrin);
627 *binding = nir_intrinsic_binding(intrin);
628 return build_res_index(b, *set, *binding, intrin->src[0].ssa,
629 addr_format, state);
630 } else {
631 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
632 nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
633 nir_ssa_def *index =
634 build_res_index_for_chain(b, parent, addr_format,
635 set, binding, state);
636
637 b->cursor = nir_before_instr(&intrin->instr);
638
639 assert(intrin->src[1].is_ssa);
640 return build_res_reindex(b, index, intrin->src[1].ssa, addr_format);
641 }
642 }
643
644 /** Builds a buffer address for a given vulkan [re]index intrinsic
645 *
646 * The cursor is not where you left it when this function returns.
647 */
648 static nir_ssa_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)649 build_buffer_addr_for_idx_intrin(nir_builder *b,
650 nir_intrinsic_instr *idx_intrin,
651 nir_address_format addr_format,
652 struct apply_pipeline_layout_state *state)
653 {
654 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
655 nir_ssa_def *res_index =
656 build_res_index_for_chain(b, idx_intrin, addr_format,
657 &set, &binding, state);
658
659 const struct anv_descriptor_set_binding_layout *bind_layout =
660 &state->layout->set[set].layout->binding[binding];
661
662 return build_buffer_addr_for_res_index(b, bind_layout->type,
663 res_index, addr_format, state);
664 }
665
666 /** Builds a buffer address for deref chain
667 *
668 * This assumes that you can chase the chain all the way back to the original
669 * vulkan_resource_index intrinsic.
670 *
671 * The cursor is not where you left it when this function returns.
672 */
673 static nir_ssa_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)674 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
675 nir_address_format addr_format,
676 struct apply_pipeline_layout_state *state)
677 {
678 nir_deref_instr *parent = nir_deref_instr_parent(deref);
679 if (parent) {
680 nir_ssa_def *addr =
681 build_buffer_addr_for_deref(b, parent, addr_format, state);
682
683 b->cursor = nir_before_instr(&deref->instr);
684 return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
685 }
686
687 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
688 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
689
690 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
691
692 b->cursor = nir_before_instr(&deref->instr);
693
694 return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
695 }
696
697 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,bool is_atomic,struct apply_pipeline_layout_state * state)698 try_lower_direct_buffer_intrinsic(nir_builder *b,
699 nir_intrinsic_instr *intrin, bool is_atomic,
700 struct apply_pipeline_layout_state *state)
701 {
702 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
703 if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
704 return false;
705
706 nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
707 if (desc == NULL) {
708 /* We should always be able to find the descriptor for UBO access. */
709 assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
710 return false;
711 }
712
713 nir_address_format addr_format = descriptor_address_format(desc, state);
714
715 if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
716 /* 64-bit atomics only support A64 messages so we can't lower them to
717 * the index+offset model.
718 */
719 if (is_atomic && nir_dest_bit_size(intrin->dest) == 64 &&
720 !state->pdevice->info.has_lsc)
721 return false;
722
723 /* Normal binding table-based messages can't handle non-uniform access
724 * so we have to fall back to A64.
725 */
726 if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
727 return false;
728
729 if (!descriptor_has_bti(desc, state))
730 return false;
731
732 /* Rewrite to 32bit_index_offset whenever we can */
733 addr_format = nir_address_format_32bit_index_offset;
734 } else {
735 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
736
737 /* Rewrite to 32bit_index_offset whenever we can */
738 if (descriptor_has_bti(desc, state))
739 addr_format = nir_address_format_32bit_index_offset;
740 }
741
742 nir_ssa_def *addr =
743 build_buffer_addr_for_deref(b, deref, addr_format, state);
744
745 b->cursor = nir_before_instr(&intrin->instr);
746 nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
747
748 return true;
749 }
750
751 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)752 lower_load_accel_struct_desc(nir_builder *b,
753 nir_intrinsic_instr *load_desc,
754 struct apply_pipeline_layout_state *state)
755 {
756 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
757
758 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
759
760 /* It doesn't really matter what address format we choose as
761 * everything will constant-fold nicely. Choose one that uses the
762 * actual descriptor buffer.
763 */
764 const nir_address_format addr_format =
765 nir_address_format_64bit_bounded_global;
766
767 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
768 nir_ssa_def *res_index =
769 build_res_index_for_chain(b, idx_intrin, addr_format,
770 &set, &binding, state);
771
772 const struct anv_descriptor_set_binding_layout *bind_layout =
773 &state->layout->set[set].layout->binding[binding];
774
775 b->cursor = nir_before_instr(&load_desc->instr);
776
777 nir_ssa_def *desc_addr =
778 build_desc_addr(b, bind_layout, bind_layout->type,
779 res_index, addr_format, state);
780
781 /* Acceleration structure descriptors are always uint64_t */
782 nir_ssa_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
783
784 assert(load_desc->dest.is_ssa);
785 assert(load_desc->dest.ssa.bit_size == 64);
786 assert(load_desc->dest.ssa.num_components == 1);
787 nir_ssa_def_rewrite_uses(&load_desc->dest.ssa, desc);
788 nir_instr_remove(&load_desc->instr);
789
790 return true;
791 }
792
793 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)794 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
795 {
796 struct apply_pipeline_layout_state *state = _state;
797
798 if (instr->type != nir_instr_type_intrinsic)
799 return false;
800
801 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
802 switch (intrin->intrinsic) {
803 case nir_intrinsic_load_deref:
804 case nir_intrinsic_store_deref:
805 return try_lower_direct_buffer_intrinsic(b, intrin, false, state);
806
807 case nir_intrinsic_deref_atomic_add:
808 case nir_intrinsic_deref_atomic_imin:
809 case nir_intrinsic_deref_atomic_umin:
810 case nir_intrinsic_deref_atomic_imax:
811 case nir_intrinsic_deref_atomic_umax:
812 case nir_intrinsic_deref_atomic_and:
813 case nir_intrinsic_deref_atomic_or:
814 case nir_intrinsic_deref_atomic_xor:
815 case nir_intrinsic_deref_atomic_exchange:
816 case nir_intrinsic_deref_atomic_comp_swap:
817 case nir_intrinsic_deref_atomic_fadd:
818 case nir_intrinsic_deref_atomic_fmin:
819 case nir_intrinsic_deref_atomic_fmax:
820 case nir_intrinsic_deref_atomic_fcomp_swap:
821 return try_lower_direct_buffer_intrinsic(b, intrin, true, state);
822
823 case nir_intrinsic_get_ssbo_size: {
824 /* The get_ssbo_size intrinsic always just takes a
825 * index/reindex intrinsic.
826 */
827 nir_intrinsic_instr *idx_intrin =
828 find_descriptor_for_index_src(intrin->src[0], state);
829 if (idx_intrin == NULL || !descriptor_has_bti(idx_intrin, state))
830 return false;
831
832 b->cursor = nir_before_instr(&intrin->instr);
833
834 /* We just checked that this is a BTI descriptor */
835 const nir_address_format addr_format =
836 nir_address_format_32bit_index_offset;
837
838 nir_ssa_def *buffer_addr =
839 build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
840
841 b->cursor = nir_before_instr(&intrin->instr);
842 nir_ssa_def *bti = nir_channel(b, buffer_addr, 0);
843
844 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
845 nir_src_for_ssa(bti));
846 _mesa_set_add(state->lowered_instrs, intrin);
847 return true;
848 }
849
850 case nir_intrinsic_load_vulkan_descriptor:
851 if (nir_intrinsic_desc_type(intrin) ==
852 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
853 return lower_load_accel_struct_desc(b, intrin, state);
854 return false;
855
856 default:
857 return false;
858 }
859 }
860
861 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)862 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
863 struct apply_pipeline_layout_state *state)
864 {
865 b->cursor = nir_before_instr(&intrin->instr);
866
867 nir_address_format addr_format =
868 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
869
870 assert(intrin->src[0].is_ssa);
871 nir_ssa_def *index =
872 build_res_index(b, nir_intrinsic_desc_set(intrin),
873 nir_intrinsic_binding(intrin),
874 intrin->src[0].ssa,
875 addr_format, state);
876
877 assert(intrin->dest.is_ssa);
878 assert(intrin->dest.ssa.bit_size == index->bit_size);
879 assert(intrin->dest.ssa.num_components == index->num_components);
880 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, index);
881 nir_instr_remove(&intrin->instr);
882
883 return true;
884 }
885
886 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)887 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
888 struct apply_pipeline_layout_state *state)
889 {
890 b->cursor = nir_before_instr(&intrin->instr);
891
892 nir_address_format addr_format =
893 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
894
895 assert(intrin->src[0].is_ssa && intrin->src[1].is_ssa);
896 nir_ssa_def *index =
897 build_res_reindex(b, intrin->src[0].ssa,
898 intrin->src[1].ssa,
899 addr_format);
900
901 assert(intrin->dest.is_ssa);
902 assert(intrin->dest.ssa.bit_size == index->bit_size);
903 assert(intrin->dest.ssa.num_components == index->num_components);
904 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, index);
905 nir_instr_remove(&intrin->instr);
906
907 return true;
908 }
909
910 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)911 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
912 struct apply_pipeline_layout_state *state)
913 {
914 b->cursor = nir_before_instr(&intrin->instr);
915
916 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
917 nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
918
919 assert(intrin->dest.is_ssa);
920 nir_foreach_use(src, &intrin->dest.ssa) {
921 if (src->parent_instr->type != nir_instr_type_deref)
922 continue;
923
924 nir_deref_instr *cast = nir_instr_as_deref(src->parent_instr);
925 assert(cast->deref_type == nir_deref_type_cast);
926 switch (desc_type) {
927 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
928 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
929 cast->cast.align_mul = ANV_UBO_ALIGNMENT;
930 cast->cast.align_offset = 0;
931 break;
932
933 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
934 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
935 cast->cast.align_mul = ANV_SSBO_ALIGNMENT;
936 cast->cast.align_offset = 0;
937 break;
938
939 default:
940 break;
941 }
942 }
943
944 assert(intrin->src[0].is_ssa);
945 nir_ssa_def *desc =
946 build_buffer_addr_for_res_index(b, desc_type, intrin->src[0].ssa,
947 addr_format, state);
948
949 assert(intrin->dest.is_ssa);
950 assert(intrin->dest.ssa.bit_size == desc->bit_size);
951 assert(intrin->dest.ssa.num_components == desc->num_components);
952 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
953 nir_instr_remove(&intrin->instr);
954
955 return true;
956 }
957
958 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)959 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
960 struct apply_pipeline_layout_state *state)
961 {
962 if (_mesa_set_search(state->lowered_instrs, intrin))
963 return false;
964
965 b->cursor = nir_before_instr(&intrin->instr);
966
967 nir_address_format addr_format =
968 addr_format_for_desc_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, state);
969
970 assert(intrin->src[0].is_ssa);
971 nir_ssa_def *desc =
972 build_buffer_addr_for_res_index(b, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
973 intrin->src[0].ssa, addr_format, state);
974
975 switch (addr_format) {
976 case nir_address_format_64bit_global_32bit_offset:
977 case nir_address_format_64bit_bounded_global: {
978 nir_ssa_def *size = nir_channel(b, desc, 2);
979 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, size);
980 nir_instr_remove(&intrin->instr);
981 break;
982 }
983
984 case nir_address_format_32bit_index_offset:
985 /* The binding table index is the first component of the address. The
986 * back-end wants a scalar binding table index source.
987 */
988 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
989 nir_src_for_ssa(nir_channel(b, desc, 0)));
990 break;
991
992 default:
993 unreachable("Unsupported address format");
994 }
995
996 return true;
997 }
998
999 static bool
image_binding_needs_lowered_surface(nir_variable * var)1000 image_binding_needs_lowered_surface(nir_variable *var)
1001 {
1002 return !(var->data.access & ACCESS_NON_READABLE) &&
1003 var->data.image.format != PIPE_FORMAT_NONE;
1004 }
1005
1006 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1007 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1008 struct apply_pipeline_layout_state *state)
1009 {
1010 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1011 nir_variable *var = nir_deref_instr_get_variable(deref);
1012
1013 unsigned set = var->data.descriptor_set;
1014 unsigned binding = var->data.binding;
1015 unsigned binding_offset = state->set[set].surface_offsets[binding];
1016
1017 b->cursor = nir_before_instr(&intrin->instr);
1018
1019 ASSERTED const bool use_bindless = state->pdevice->has_bindless_images;
1020
1021 if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
1022 b->cursor = nir_instr_remove(&intrin->instr);
1023
1024 assert(!use_bindless); /* Otherwise our offsets would be wrong */
1025 const unsigned param = nir_intrinsic_base(intrin);
1026
1027 nir_ssa_def *desc =
1028 build_load_var_deref_descriptor_mem(b, deref, param * 16,
1029 intrin->dest.ssa.num_components,
1030 intrin->dest.ssa.bit_size, state);
1031
1032 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
1033 } else if (binding_offset > MAX_BINDING_TABLE_SIZE) {
1034 const unsigned desc_comp =
1035 image_binding_needs_lowered_surface(var) ? 1 : 0;
1036 nir_ssa_def *desc =
1037 build_load_var_deref_descriptor_mem(b, deref, 0, 2, 32, state);
1038 nir_ssa_def *handle = nir_channel(b, desc, desc_comp);
1039 nir_rewrite_image_intrinsic(intrin, handle, true);
1040 } else {
1041 unsigned array_size =
1042 state->layout->set[set].layout->binding[binding].array_size;
1043
1044 nir_ssa_def *index = NULL;
1045 if (deref->deref_type != nir_deref_type_var) {
1046 assert(deref->deref_type == nir_deref_type_array);
1047 index = nir_ssa_for_src(b, deref->arr.index, 1);
1048 if (state->add_bounds_checks)
1049 index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
1050 } else {
1051 index = nir_imm_int(b, 0);
1052 }
1053
1054 index = nir_iadd_imm(b, index, binding_offset);
1055 nir_rewrite_image_intrinsic(intrin, index, false);
1056 }
1057
1058 return true;
1059 }
1060
1061 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1062 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
1063 struct apply_pipeline_layout_state *state)
1064 {
1065 b->cursor = nir_instr_remove(&intrin->instr);
1066
1067 /* Any constant-offset load_constant instructions should have been removed
1068 * by constant folding.
1069 */
1070 assert(!nir_src_is_const(intrin->src[0]));
1071 nir_ssa_def *offset = nir_iadd_imm(b, nir_ssa_for_src(b, intrin->src[0], 1),
1072 nir_intrinsic_base(intrin));
1073
1074 nir_ssa_def *data;
1075 if (!anv_use_relocations(state->pdevice)) {
1076 unsigned load_size = intrin->dest.ssa.num_components *
1077 intrin->dest.ssa.bit_size / 8;
1078 unsigned load_align = intrin->dest.ssa.bit_size / 8;
1079
1080 assert(load_size < b->shader->constant_data_size);
1081 unsigned max_offset = b->shader->constant_data_size - load_size;
1082 offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
1083
1084 nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(b,
1085 nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
1086 nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
1087
1088 data = nir_load_global_constant(b, nir_iadd(b, const_data_base_addr,
1089 nir_u2u64(b, offset)),
1090 load_align,
1091 intrin->dest.ssa.num_components,
1092 intrin->dest.ssa.bit_size);
1093 } else {
1094 nir_ssa_def *index = nir_imm_int(b, state->constants_offset);
1095
1096 data = nir_load_ubo(b, intrin->num_components, intrin->dest.ssa.bit_size,
1097 index, offset,
1098 .align_mul = intrin->dest.ssa.bit_size / 8,
1099 .align_offset = 0,
1100 .range_base = nir_intrinsic_base(intrin),
1101 .range = nir_intrinsic_range(intrin));
1102 }
1103
1104 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, data);
1105
1106 return true;
1107 }
1108
1109 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned * base_index,unsigned plane,struct apply_pipeline_layout_state * state)1110 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
1111 nir_tex_src_type deref_src_type,
1112 unsigned *base_index, unsigned plane,
1113 struct apply_pipeline_layout_state *state)
1114 {
1115 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
1116 if (deref_src_idx < 0)
1117 return;
1118
1119 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1120 nir_variable *var = nir_deref_instr_get_variable(deref);
1121
1122 unsigned set = var->data.descriptor_set;
1123 unsigned binding = var->data.binding;
1124 unsigned array_size =
1125 state->layout->set[set].layout->binding[binding].array_size;
1126
1127 unsigned binding_offset;
1128 if (deref_src_type == nir_tex_src_texture_deref) {
1129 binding_offset = state->set[set].surface_offsets[binding];
1130 } else {
1131 assert(deref_src_type == nir_tex_src_sampler_deref);
1132 binding_offset = state->set[set].sampler_offsets[binding];
1133 }
1134
1135 nir_tex_src_type offset_src_type;
1136 nir_ssa_def *index = NULL;
1137 if (binding_offset > MAX_BINDING_TABLE_SIZE) {
1138 const unsigned plane_offset =
1139 plane * sizeof(struct anv_sampled_image_descriptor);
1140
1141 nir_ssa_def *desc =
1142 build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1143 2, 32, state);
1144
1145 if (deref_src_type == nir_tex_src_texture_deref) {
1146 offset_src_type = nir_tex_src_texture_handle;
1147 index = nir_channel(b, desc, 0);
1148 } else {
1149 assert(deref_src_type == nir_tex_src_sampler_deref);
1150 offset_src_type = nir_tex_src_sampler_handle;
1151 index = nir_channel(b, desc, 1);
1152 }
1153 } else {
1154 if (deref_src_type == nir_tex_src_texture_deref) {
1155 offset_src_type = nir_tex_src_texture_offset;
1156 } else {
1157 assert(deref_src_type == nir_tex_src_sampler_deref);
1158 offset_src_type = nir_tex_src_sampler_offset;
1159 }
1160
1161 *base_index = binding_offset + plane;
1162
1163 if (deref->deref_type != nir_deref_type_var) {
1164 assert(deref->deref_type == nir_deref_type_array);
1165
1166 if (nir_src_is_const(deref->arr.index)) {
1167 unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
1168 struct anv_sampler **immutable_samplers =
1169 state->layout->set[set].layout->binding[binding].immutable_samplers;
1170 if (immutable_samplers) {
1171 /* Array of YCbCr samplers are tightly packed in the binding
1172 * tables, compute the offset of an element in the array by
1173 * adding the number of planes of all preceding elements.
1174 */
1175 unsigned desc_arr_index = 0;
1176 for (int i = 0; i < arr_index; i++)
1177 desc_arr_index += immutable_samplers[i]->n_planes;
1178 *base_index += desc_arr_index;
1179 } else {
1180 *base_index += arr_index;
1181 }
1182 } else {
1183 /* From VK_KHR_sampler_ycbcr_conversion:
1184 *
1185 * If sampler Y’CBCR conversion is enabled, the combined image
1186 * sampler must be indexed only by constant integral expressions
1187 * when aggregated into arrays in shader code, irrespective of
1188 * the shaderSampledImageArrayDynamicIndexing feature.
1189 */
1190 assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
1191
1192 index = nir_ssa_for_src(b, deref->arr.index, 1);
1193
1194 if (state->add_bounds_checks)
1195 index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
1196 }
1197 }
1198 }
1199
1200 if (index) {
1201 nir_instr_rewrite_src(&tex->instr, &tex->src[deref_src_idx].src,
1202 nir_src_for_ssa(index));
1203 tex->src[deref_src_idx].src_type = offset_src_type;
1204 } else {
1205 nir_tex_instr_remove_src(tex, deref_src_idx);
1206 }
1207 }
1208
1209 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1210 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1211 {
1212 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1213 if (plane_src_idx < 0)
1214 return 0;
1215
1216 unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1217
1218 nir_tex_instr_remove_src(tex, plane_src_idx);
1219
1220 return plane;
1221 }
1222
1223 static nir_ssa_def *
build_def_array_select(nir_builder * b,nir_ssa_def ** srcs,nir_ssa_def * idx,unsigned start,unsigned end)1224 build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
1225 unsigned start, unsigned end)
1226 {
1227 if (start == end - 1) {
1228 return srcs[start];
1229 } else {
1230 unsigned mid = start + (end - start) / 2;
1231 return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)),
1232 build_def_array_select(b, srcs, idx, start, mid),
1233 build_def_array_select(b, srcs, idx, mid, end));
1234 }
1235 }
1236
1237 static void
lower_gfx7_tex_swizzle(nir_builder * b,nir_tex_instr * tex,unsigned plane,struct apply_pipeline_layout_state * state)1238 lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
1239 struct apply_pipeline_layout_state *state)
1240 {
1241 assert(state->pdevice->info.verx10 == 70);
1242 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
1243 nir_tex_instr_is_query(tex) ||
1244 tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
1245 (tex->is_shadow && tex->is_new_style_shadow))
1246 return;
1247
1248 int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1249 assert(deref_src_idx >= 0);
1250
1251 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1252 nir_variable *var = nir_deref_instr_get_variable(deref);
1253
1254 unsigned set = var->data.descriptor_set;
1255 unsigned binding = var->data.binding;
1256 const struct anv_descriptor_set_binding_layout *bind_layout =
1257 &state->layout->set[set].layout->binding[binding];
1258
1259 if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
1260 return;
1261
1262 b->cursor = nir_before_instr(&tex->instr);
1263
1264 const unsigned plane_offset =
1265 plane * sizeof(struct anv_texture_swizzle_descriptor);
1266 nir_ssa_def *swiz =
1267 build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1268 1, 32, state);
1269
1270 b->cursor = nir_after_instr(&tex->instr);
1271
1272 assert(tex->dest.ssa.bit_size == 32);
1273 assert(tex->dest.ssa.num_components == 4);
1274
1275 /* Initializing to undef is ok; nir_opt_undef will clean it up. */
1276 nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
1277 nir_ssa_def *comps[8];
1278 for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
1279 comps[i] = undef;
1280
1281 comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
1282 if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
1283 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
1284 else
1285 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
1286 comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
1287 comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
1288 comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
1289 comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
1290
1291 nir_ssa_def *swiz_comps[4];
1292 for (unsigned i = 0; i < 4; i++) {
1293 nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
1294 swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
1295 }
1296 nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
1297
1298 /* Rewrite uses before we insert so we don't rewrite this use */
1299 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
1300 swiz_tex_res,
1301 swiz_tex_res->parent_instr);
1302 }
1303
1304 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1305 lower_tex(nir_builder *b, nir_tex_instr *tex,
1306 struct apply_pipeline_layout_state *state)
1307 {
1308 unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1309
1310 /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
1311 * before we lower the derefs away so we can still find the descriptor.
1312 */
1313 if (state->pdevice->info.verx10 == 70)
1314 lower_gfx7_tex_swizzle(b, tex, plane, state);
1315
1316 b->cursor = nir_before_instr(&tex->instr);
1317
1318 lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1319 &tex->texture_index, plane, state);
1320
1321 lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1322 &tex->sampler_index, plane, state);
1323
1324 return true;
1325 }
1326
1327 static bool
lower_ray_query_globals(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)1328 lower_ray_query_globals(nir_builder *b, nir_intrinsic_instr *intrin,
1329 struct apply_pipeline_layout_state *state)
1330 {
1331 b->cursor = nir_instr_remove(&intrin->instr);
1332
1333 nir_ssa_def *rq_globals =
1334 nir_load_push_constant(b, 1, 64, nir_imm_int(b, 0),
1335 .base = offsetof(struct anv_push_constants, ray_query_globals),
1336 .range = sizeof_field(struct anv_push_constants, ray_query_globals));
1337 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, rq_globals);
1338
1339 return true;
1340 }
1341
1342 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1343 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1344 {
1345 struct apply_pipeline_layout_state *state = _state;
1346
1347 switch (instr->type) {
1348 case nir_instr_type_intrinsic: {
1349 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1350 switch (intrin->intrinsic) {
1351 case nir_intrinsic_vulkan_resource_index:
1352 return lower_res_index_intrinsic(b, intrin, state);
1353 case nir_intrinsic_vulkan_resource_reindex:
1354 return lower_res_reindex_intrinsic(b, intrin, state);
1355 case nir_intrinsic_load_vulkan_descriptor:
1356 return lower_load_vulkan_descriptor(b, intrin, state);
1357 case nir_intrinsic_get_ssbo_size:
1358 return lower_get_ssbo_size(b, intrin, state);
1359 case nir_intrinsic_image_deref_load:
1360 case nir_intrinsic_image_deref_store:
1361 case nir_intrinsic_image_deref_atomic_add:
1362 case nir_intrinsic_image_deref_atomic_imin:
1363 case nir_intrinsic_image_deref_atomic_umin:
1364 case nir_intrinsic_image_deref_atomic_imax:
1365 case nir_intrinsic_image_deref_atomic_umax:
1366 case nir_intrinsic_image_deref_atomic_and:
1367 case nir_intrinsic_image_deref_atomic_or:
1368 case nir_intrinsic_image_deref_atomic_xor:
1369 case nir_intrinsic_image_deref_atomic_exchange:
1370 case nir_intrinsic_image_deref_atomic_comp_swap:
1371 case nir_intrinsic_image_deref_atomic_fadd:
1372 case nir_intrinsic_image_deref_size:
1373 case nir_intrinsic_image_deref_samples:
1374 case nir_intrinsic_image_deref_load_param_intel:
1375 case nir_intrinsic_image_deref_load_raw_intel:
1376 case nir_intrinsic_image_deref_store_raw_intel:
1377 return lower_image_intrinsic(b, intrin, state);
1378 case nir_intrinsic_load_constant:
1379 return lower_load_constant(b, intrin, state);
1380 case nir_intrinsic_load_ray_query_global_intel:
1381 return lower_ray_query_globals(b, intrin, state);
1382 default:
1383 return false;
1384 }
1385 break;
1386 }
1387 case nir_instr_type_tex:
1388 return lower_tex(b, nir_instr_as_tex(instr), state);
1389 default:
1390 return false;
1391 }
1392 }
1393
1394 struct binding_info {
1395 uint32_t binding;
1396 uint8_t set;
1397 uint16_t score;
1398 };
1399
1400 static int
compare_binding_infos(const void * _a,const void * _b)1401 compare_binding_infos(const void *_a, const void *_b)
1402 {
1403 const struct binding_info *a = _a, *b = _b;
1404 if (a->score != b->score)
1405 return b->score - a->score;
1406
1407 if (a->set != b->set)
1408 return a->set - b->set;
1409
1410 return a->binding - b->binding;
1411 }
1412
1413 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,bool robust_buffer_access,const struct anv_pipeline_layout * layout,struct anv_pipeline_bind_map * map)1414 anv_nir_apply_pipeline_layout(nir_shader *shader,
1415 const struct anv_physical_device *pdevice,
1416 bool robust_buffer_access,
1417 const struct anv_pipeline_layout *layout,
1418 struct anv_pipeline_bind_map *map)
1419 {
1420 void *mem_ctx = ralloc_context(NULL);
1421
1422 struct apply_pipeline_layout_state state = {
1423 .pdevice = pdevice,
1424 .layout = layout,
1425 .add_bounds_checks = robust_buffer_access,
1426 .desc_addr_format =
1427 brw_shader_stage_requires_bindless_resources(shader->info.stage) ?
1428 nir_address_format_64bit_global_32bit_offset :
1429 nir_address_format_32bit_index_offset,
1430 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access),
1431 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_buffer_access),
1432 .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
1433 };
1434
1435 for (unsigned s = 0; s < layout->num_sets; s++) {
1436 const unsigned count = layout->set[s].layout->binding_count;
1437 state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count);
1438 state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1439 state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1440 }
1441
1442 nir_shader_instructions_pass(shader, get_used_bindings,
1443 nir_metadata_all, &state);
1444
1445 for (unsigned s = 0; s < layout->num_sets; s++) {
1446 if (state.desc_addr_format != nir_address_format_32bit_index_offset) {
1447 state.set[s].desc_offset = BINDLESS_OFFSET;
1448 } else if (state.set[s].desc_buffer_used) {
1449 map->surface_to_descriptor[map->surface_count] =
1450 (struct anv_pipeline_binding) {
1451 .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
1452 .index = s,
1453 };
1454 state.set[s].desc_offset = map->surface_count;
1455 map->surface_count++;
1456 }
1457 }
1458
1459 if (state.uses_constants && anv_use_relocations(pdevice)) {
1460 state.constants_offset = map->surface_count;
1461 map->surface_to_descriptor[map->surface_count].set =
1462 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
1463 map->surface_count++;
1464 }
1465
1466 unsigned used_binding_count = 0;
1467 for (uint32_t set = 0; set < layout->num_sets; set++) {
1468 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1469 for (unsigned b = 0; b < set_layout->binding_count; b++) {
1470 if (state.set[set].use_count[b] == 0)
1471 continue;
1472
1473 used_binding_count++;
1474 }
1475 }
1476
1477 struct binding_info *infos =
1478 rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
1479 used_binding_count = 0;
1480 for (uint32_t set = 0; set < layout->num_sets; set++) {
1481 const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1482 for (unsigned b = 0; b < set_layout->binding_count; b++) {
1483 if (state.set[set].use_count[b] == 0)
1484 continue;
1485
1486 const struct anv_descriptor_set_binding_layout *binding =
1487 &layout->set[set].layout->binding[b];
1488
1489 /* Do a fixed-point calculation to generate a score based on the
1490 * number of uses and the binding array size. We shift by 7 instead
1491 * of 8 because we're going to use the top bit below to make
1492 * everything which does not support bindless super higher priority
1493 * than things which do.
1494 */
1495 uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) /
1496 binding->array_size;
1497
1498 /* If the descriptor type doesn't support bindless then put it at the
1499 * beginning so we guarantee it gets a slot.
1500 */
1501 if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
1502 !anv_descriptor_supports_bindless(pdevice, binding, false))
1503 score |= 1 << 15;
1504
1505 infos[used_binding_count++] = (struct binding_info) {
1506 .set = set,
1507 .binding = b,
1508 .score = score,
1509 };
1510 }
1511 }
1512
1513 /* Order the binding infos based on score with highest scores first. If
1514 * scores are equal we then order by set and binding.
1515 */
1516 qsort(infos, used_binding_count, sizeof(struct binding_info),
1517 compare_binding_infos);
1518
1519 for (unsigned i = 0; i < used_binding_count; i++) {
1520 unsigned set = infos[i].set, b = infos[i].binding;
1521 const struct anv_descriptor_set_binding_layout *binding =
1522 &layout->set[set].layout->binding[b];
1523
1524 const uint32_t array_size = binding->array_size;
1525
1526 if (binding->dynamic_offset_index >= 0)
1527 state.has_dynamic_buffers = true;
1528
1529 if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
1530 if (map->surface_count + array_size > MAX_BINDING_TABLE_SIZE ||
1531 anv_descriptor_requires_bindless(pdevice, binding, false) ||
1532 brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
1533 /* If this descriptor doesn't fit in the binding table or if it
1534 * requires bindless for some reason, flag it as bindless.
1535 */
1536 assert(anv_descriptor_supports_bindless(pdevice, binding, false));
1537 state.set[set].surface_offsets[b] = BINDLESS_OFFSET;
1538 } else {
1539 state.set[set].surface_offsets[b] = map->surface_count;
1540 if (binding->dynamic_offset_index < 0) {
1541 struct anv_sampler **samplers = binding->immutable_samplers;
1542 for (unsigned i = 0; i < binding->array_size; i++) {
1543 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1544 for (uint8_t p = 0; p < planes; p++) {
1545 map->surface_to_descriptor[map->surface_count++] =
1546 (struct anv_pipeline_binding) {
1547 .set = set,
1548 .index = binding->descriptor_index + i,
1549 .plane = p,
1550 };
1551 }
1552 }
1553 } else {
1554 for (unsigned i = 0; i < binding->array_size; i++) {
1555 map->surface_to_descriptor[map->surface_count++] =
1556 (struct anv_pipeline_binding) {
1557 .set = set,
1558 .index = binding->descriptor_index + i,
1559 .dynamic_offset_index =
1560 layout->set[set].dynamic_offset_start +
1561 binding->dynamic_offset_index + i,
1562 };
1563 }
1564 }
1565 }
1566 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1567 }
1568
1569 if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) {
1570 if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE ||
1571 anv_descriptor_requires_bindless(pdevice, binding, true) ||
1572 brw_shader_stage_requires_bindless_resources(shader->info.stage)) {
1573 /* If this descriptor doesn't fit in the binding table or if it
1574 * requires bindless for some reason, flag it as bindless.
1575 *
1576 * We also make large sampler arrays bindless because we can avoid
1577 * using indirect sends thanks to bindless samplers being packed
1578 * less tightly than the sampler table.
1579 */
1580 assert(anv_descriptor_supports_bindless(pdevice, binding, true));
1581 state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
1582 } else {
1583 state.set[set].sampler_offsets[b] = map->sampler_count;
1584 struct anv_sampler **samplers = binding->immutable_samplers;
1585 for (unsigned i = 0; i < binding->array_size; i++) {
1586 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1587 for (uint8_t p = 0; p < planes; p++) {
1588 map->sampler_to_descriptor[map->sampler_count++] =
1589 (struct anv_pipeline_binding) {
1590 .set = set,
1591 .index = binding->descriptor_index + i,
1592 .plane = p,
1593 };
1594 }
1595 }
1596 }
1597 }
1598 }
1599
1600 nir_foreach_image_variable(var, shader) {
1601 const uint32_t set = var->data.descriptor_set;
1602 const uint32_t binding = var->data.binding;
1603 const struct anv_descriptor_set_binding_layout *bind_layout =
1604 &layout->set[set].layout->binding[binding];
1605 const uint32_t array_size = bind_layout->array_size;
1606
1607 if (state.set[set].use_count[binding] == 0)
1608 continue;
1609
1610 if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE)
1611 continue;
1612
1613 struct anv_pipeline_binding *pipe_binding =
1614 &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
1615 for (unsigned i = 0; i < array_size; i++) {
1616 assert(pipe_binding[i].set == set);
1617 assert(pipe_binding[i].index == bind_layout->descriptor_index + i);
1618
1619 pipe_binding[i].lowered_storage_surface =
1620 image_binding_needs_lowered_surface(var);
1621 }
1622 }
1623
1624 /* Before we do the normal lowering, we look for any SSBO operations
1625 * that we can lower to the BTI model and lower them up-front. The BTI
1626 * model can perform better than the A64 model for a couple reasons:
1627 *
1628 * 1. 48-bit address calculations are potentially expensive and using
1629 * the BTI model lets us simply compute 32-bit offsets and the
1630 * hardware adds the 64-bit surface base address.
1631 *
1632 * 2. The BTI messages, because they use surface states, do bounds
1633 * checking for us. With the A64 model, we have to do our own
1634 * bounds checking and this means wider pointers and extra
1635 * calculations and branching in the shader.
1636 *
1637 * The solution to both of these is to convert things to the BTI model
1638 * opportunistically. The reason why we need to do this as a pre-pass
1639 * is for two reasons:
1640 *
1641 * 1. The BTI model requires nir_address_format_32bit_index_offset
1642 * pointers which are not the same type as the pointers needed for
1643 * the A64 model. Because all our derefs are set up for the A64
1644 * model (in case we have variable pointers), we have to crawl all
1645 * the way back to the vulkan_resource_index intrinsic and build a
1646 * completely fresh index+offset calculation.
1647 *
1648 * 2. Because the variable-pointers-capable lowering that we do as part
1649 * of apply_pipeline_layout_block is destructive (It really has to
1650 * be to handle variable pointers properly), we've lost the deref
1651 * information by the time we get to the load/store/atomic
1652 * intrinsics in that pass.
1653 */
1654 nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
1655 nir_metadata_block_index |
1656 nir_metadata_dominance,
1657 &state);
1658
1659 /* We just got rid of all the direct access. Delete it so it's not in the
1660 * way when we do our indirect lowering.
1661 */
1662 nir_opt_dce(shader);
1663
1664 nir_shader_instructions_pass(shader, apply_pipeline_layout,
1665 nir_metadata_block_index |
1666 nir_metadata_dominance,
1667 &state);
1668
1669 ralloc_free(mem_ctx);
1670
1671 if (brw_shader_stage_is_bindless(shader->info.stage)) {
1672 assert(map->surface_count == 0);
1673 assert(map->sampler_count == 0);
1674 }
1675
1676 /* Now that we're done computing the surface and sampler portions of the
1677 * bind map, hash them. This lets us quickly determine if the actual
1678 * mapping has changed and not just a no-op pipeline change.
1679 */
1680 _mesa_sha1_compute(map->surface_to_descriptor,
1681 map->surface_count * sizeof(struct anv_pipeline_binding),
1682 map->surface_sha1);
1683 _mesa_sha1_compute(map->sampler_to_descriptor,
1684 map->sampler_count * sizeof(struct anv_pipeline_binding),
1685 map->sampler_sha1);
1686 }
1687