1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/elk/elk_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31 * that we don't end up emitting too much state on-the-fly.
32 */
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET 255
35
36 #define sizeof_field(type, field) sizeof(((type *)0)->field)
37
38 struct apply_pipeline_layout_state {
39 const struct anv_physical_device *pdevice;
40
41 const struct anv_pipeline_layout *layout;
42 nir_address_format ssbo_addr_format;
43 nir_address_format ubo_addr_format;
44
45 /* Place to flag lowered instructions so we don't lower them twice */
46 struct set *lowered_instrs;
47
48 bool uses_constants;
49 bool has_dynamic_buffers;
50 uint8_t constants_offset;
51 struct {
52 bool desc_buffer_used;
53 uint8_t desc_offset;
54
55 uint8_t *use_count;
56 uint8_t *surface_offsets;
57 uint8_t *sampler_offsets;
58 } set[MAX_SETS];
59 };
60
61 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)62 addr_format_for_desc_type(VkDescriptorType desc_type,
63 struct apply_pipeline_layout_state *state)
64 {
65 switch (desc_type) {
66 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
67 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
68 return state->ssbo_addr_format;
69
70 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
71 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
72 return state->ubo_addr_format;
73
74 case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
75 return nir_address_format_32bit_index_offset;
76
77 default:
78 unreachable("Unsupported descriptor type");
79 }
80 }
81
82 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)83 add_binding(struct apply_pipeline_layout_state *state,
84 uint32_t set, uint32_t binding)
85 {
86 const struct anv_descriptor_set_binding_layout *bind_layout =
87 &state->layout->set[set].layout->binding[binding];
88
89 if (state->set[set].use_count[binding] < UINT8_MAX)
90 state->set[set].use_count[binding]++;
91
92 /* Only flag the descriptor buffer as used if there's actually data for
93 * this binding. This lets us be lazy and call this function constantly
94 * without worrying about unnecessarily enabling the buffer.
95 */
96 if (bind_layout->descriptor_stride)
97 state->set[set].desc_buffer_used = true;
98 }
99
100 static void
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)101 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
102 {
103 nir_deref_instr *deref = nir_src_as_deref(src);
104 nir_variable *var = nir_deref_instr_get_variable(deref);
105 add_binding(state, var->data.descriptor_set, var->data.binding);
106 }
107
108 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)109 add_tex_src_binding(struct apply_pipeline_layout_state *state,
110 nir_tex_instr *tex, nir_tex_src_type deref_src_type)
111 {
112 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
113 if (deref_src_idx < 0)
114 return;
115
116 add_deref_src_binding(state, tex->src[deref_src_idx].src);
117 }
118
119 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)120 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
121 {
122 struct apply_pipeline_layout_state *state = _state;
123
124 switch (instr->type) {
125 case nir_instr_type_intrinsic: {
126 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
127 switch (intrin->intrinsic) {
128 case nir_intrinsic_vulkan_resource_index:
129 add_binding(state, nir_intrinsic_desc_set(intrin),
130 nir_intrinsic_binding(intrin));
131 break;
132
133 case nir_intrinsic_image_deref_load:
134 case nir_intrinsic_image_deref_store:
135 case nir_intrinsic_image_deref_atomic:
136 case nir_intrinsic_image_deref_atomic_swap:
137 case nir_intrinsic_image_deref_size:
138 case nir_intrinsic_image_deref_samples:
139 case nir_intrinsic_image_deref_load_param_intel:
140 case nir_intrinsic_image_deref_load_raw_intel:
141 case nir_intrinsic_image_deref_store_raw_intel:
142 add_deref_src_binding(state, intrin->src[0]);
143 break;
144
145 case nir_intrinsic_load_constant:
146 state->uses_constants = true;
147 break;
148
149 default:
150 break;
151 }
152 break;
153 }
154 case nir_instr_type_tex: {
155 nir_tex_instr *tex = nir_instr_as_tex(instr);
156 add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
157 add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
158 break;
159 }
160 default:
161 break;
162 }
163
164 return false;
165 }
166
167 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)168 find_descriptor_for_index_src(nir_src src,
169 struct apply_pipeline_layout_state *state)
170 {
171 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
172
173 while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
174 intrin = nir_src_as_intrinsic(intrin->src[0]);
175
176 if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
177 return NULL;
178
179 return intrin;
180 }
181
182 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)183 descriptor_has_bti(nir_intrinsic_instr *intrin,
184 struct apply_pipeline_layout_state *state)
185 {
186 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
187
188 uint32_t set = nir_intrinsic_desc_set(intrin);
189 uint32_t binding = nir_intrinsic_binding(intrin);
190 const struct anv_descriptor_set_binding_layout *bind_layout =
191 &state->layout->set[set].layout->binding[binding];
192
193 uint32_t surface_index;
194 if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
195 surface_index = state->set[set].desc_offset;
196 else
197 surface_index = state->set[set].surface_offsets[binding];
198
199 /* Only lower to a BTI message if we have a valid binding table index. */
200 return surface_index < MAX_BINDING_TABLE_SIZE;
201 }
202
203 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)204 descriptor_address_format(nir_intrinsic_instr *intrin,
205 struct apply_pipeline_layout_state *state)
206 {
207 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
208
209 return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
210 }
211
212 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)213 nir_deref_find_descriptor(nir_deref_instr *deref,
214 struct apply_pipeline_layout_state *state)
215 {
216 while (1) {
217 /* Nothing we will use this on has a variable */
218 assert(deref->deref_type != nir_deref_type_var);
219
220 nir_deref_instr *parent = nir_src_as_deref(deref->parent);
221 if (!parent)
222 break;
223
224 deref = parent;
225 }
226 assert(deref->deref_type == nir_deref_type_cast);
227
228 nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
229 if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
230 return false;
231
232 return find_descriptor_for_index_src(intrin->src[0], state);
233 }
234
235 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)236 build_load_descriptor_mem(nir_builder *b,
237 nir_def *desc_addr, unsigned desc_offset,
238 unsigned num_components, unsigned bit_size,
239 struct apply_pipeline_layout_state *state)
240
241 {
242 nir_def *surface_index = nir_channel(b, desc_addr, 0);
243 nir_def *offset32 =
244 nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
245
246 return nir_load_ubo(b, num_components, bit_size,
247 surface_index, offset32,
248 .align_mul = 8,
249 .align_offset = desc_offset % 8,
250 .range_base = 0,
251 .range = ~0);
252 }
253
254 /** Build a Vulkan resource index
255 *
256 * A "resource index" is the term used by our SPIR-V parser and the relevant
257 * NIR intrinsics for a reference into a descriptor set. It acts much like a
258 * deref in NIR except that it accesses opaque descriptors instead of memory.
259 *
260 * Coming out of SPIR-V, both the resource indices (in the form of
261 * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
262 * of nir_deref_instr) use the same vector component/bit size. The meaning
263 * of those values for memory derefs (nir_deref_instr) is given by the
264 * nir_address_format associated with the descriptor type. For resource
265 * indices, it's an entirely internal to ANV encoding which describes, in some
266 * sense, the address of the descriptor. Thanks to the NIR/SPIR-V rules, it
267 * must be packed into the same size SSA values as a memory address. For this
268 * reason, the actual encoding may depend both on the address format for
269 * memory derefs and the descriptor address format.
270 *
271 * The load_vulkan_descriptor intrinsic exists to provide a transition point
272 * between these two forms of derefs: descriptor and memory.
273 */
274 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)275 build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
276 nir_def *array_index, nir_address_format addr_format,
277 struct apply_pipeline_layout_state *state)
278 {
279 const struct anv_descriptor_set_binding_layout *bind_layout =
280 &state->layout->set[set].layout->binding[binding];
281
282 uint32_t array_size = bind_layout->array_size;
283
284 switch (addr_format) {
285 case nir_address_format_64bit_global_32bit_offset:
286 case nir_address_format_64bit_bounded_global: {
287 assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
288 uint32_t set_idx = state->set[set].desc_offset;
289
290 assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
291 uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */
292 if (bind_layout->dynamic_offset_index >= 0) {
293 dynamic_offset_index =
294 state->layout->set[set].dynamic_offset_start +
295 bind_layout->dynamic_offset_index;
296 }
297
298 const uint32_t packed = (bind_layout->descriptor_stride << 16) | (set_idx << 8) | dynamic_offset_index;
299
300 return nir_vec4(b, nir_imm_int(b, packed),
301 nir_imm_int(b, bind_layout->descriptor_offset),
302 nir_imm_int(b, array_size - 1),
303 array_index);
304 }
305
306 case nir_address_format_32bit_index_offset: {
307 if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
308 uint32_t surface_index = state->set[set].desc_offset;
309 return nir_imm_ivec2(b, surface_index,
310 bind_layout->descriptor_offset);
311 } else {
312 uint32_t surface_index = state->set[set].surface_offsets[binding];
313 assert(array_size > 0 && array_size <= UINT16_MAX);
314 assert(surface_index <= UINT16_MAX);
315 uint32_t packed = ((array_size - 1) << 16) | surface_index;
316 return nir_vec2(b, array_index, nir_imm_int(b, packed));
317 }
318 }
319
320 default:
321 unreachable("Unsupported address format");
322 }
323 }
324
325 struct res_index_defs {
326 nir_def *set_idx;
327 nir_def *dyn_offset_base;
328 nir_def *desc_offset_base;
329 nir_def *array_index;
330 nir_def *desc_stride;
331 };
332
333 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)334 unpack_res_index(nir_builder *b, nir_def *index)
335 {
336 struct res_index_defs defs;
337
338 nir_def *packed = nir_channel(b, index, 0);
339 defs.desc_stride = nir_extract_u8(b, packed, nir_imm_int(b, 2));
340 defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
341 defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
342
343 defs.desc_offset_base = nir_channel(b, index, 1);
344 defs.array_index = nir_umin(b, nir_channel(b, index, 2),
345 nir_channel(b, index, 3));
346
347 return defs;
348 }
349
350 /** Adjust a Vulkan resource index
351 *
352 * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
353 * For array descriptors, it allows us to adjust the array index. Thanks to
354 * variable pointers, we cannot always fold this re-index operation into the
355 * vulkan_resource_index intrinsic and we have to do it based on nothing but
356 * the address format.
357 */
358 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta,nir_address_format addr_format)359 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta,
360 nir_address_format addr_format)
361 {
362 switch (addr_format) {
363 case nir_address_format_64bit_global_32bit_offset:
364 case nir_address_format_64bit_bounded_global:
365 return nir_vec4(b, nir_channel(b, orig, 0),
366 nir_channel(b, orig, 1),
367 nir_channel(b, orig, 2),
368 nir_iadd(b, nir_channel(b, orig, 3), delta));
369
370 case nir_address_format_32bit_index_offset:
371 return nir_vec2(b, nir_iadd(b, nir_channel(b, orig, 0), delta),
372 nir_channel(b, orig, 1));
373
374 default:
375 unreachable("Unhandled address format");
376 }
377 }
378
379 /** Get the address for a descriptor given its resource index
380 *
381 * Because of the re-indexing operations, we can't bounds check descriptor
382 * array access until we have the final index. That means we end up doing the
383 * bounds check here, if needed. See unpack_res_index() for more details.
384 *
385 * This function takes both a bind_layout and a desc_type which are used to
386 * determine the descriptor stride for array descriptors. The bind_layout is
387 * optional for buffer descriptor types.
388 */
389 static nir_def *
build_desc_addr(nir_builder * b,const struct anv_descriptor_set_binding_layout * bind_layout,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)390 build_desc_addr(nir_builder *b,
391 const struct anv_descriptor_set_binding_layout *bind_layout,
392 const VkDescriptorType desc_type,
393 nir_def *index, nir_address_format addr_format,
394 struct apply_pipeline_layout_state *state)
395 {
396 switch (addr_format) {
397 case nir_address_format_64bit_global_32bit_offset:
398 case nir_address_format_64bit_bounded_global: {
399 struct res_index_defs res = unpack_res_index(b, index);
400
401 nir_def *desc_offset = res.desc_offset_base;
402 if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
403 /* Compute the actual descriptor offset. For inline uniform blocks,
404 * the array index is ignored as they are only allowed to be a single
405 * descriptor (not an array) and there is no concept of a "stride".
406 *
407 */
408 desc_offset =
409 nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
410 }
411
412 return nir_vec2(b, res.set_idx, desc_offset);
413 }
414
415 case nir_address_format_32bit_index_offset:
416 assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
417 return index;
418
419 default:
420 unreachable("Unhandled address format");
421 }
422 }
423
424 /** Convert a Vulkan resource index into a buffer address
425 *
426 * In some cases, this does a memory load from the descriptor set and, in
427 * others, it simply converts from one form to another.
428 *
429 * See build_res_index for details about each resource index format.
430 */
431 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)432 build_buffer_addr_for_res_index(nir_builder *b,
433 const VkDescriptorType desc_type,
434 nir_def *res_index,
435 nir_address_format addr_format,
436 struct apply_pipeline_layout_state *state)
437 {
438 if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
439 assert(addr_format == nir_address_format_32bit_index_offset);
440 return build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
441 } else if (addr_format == nir_address_format_32bit_index_offset) {
442 nir_def *array_index = nir_channel(b, res_index, 0);
443 nir_def *packed = nir_channel(b, res_index, 1);
444 nir_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
445
446 return nir_vec2(b, nir_iadd(b, surface_index, array_index),
447 nir_imm_int(b, 0));
448 }
449
450 nir_def *desc_addr =
451 build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
452
453 nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
454
455 if (state->has_dynamic_buffers) {
456 struct res_index_defs res = unpack_res_index(b, res_index);
457
458 /* This shader has dynamic offsets and we have no way of knowing
459 * (save from the dynamic offset base index) if this buffer has a
460 * dynamic offset.
461 */
462 nir_def *dyn_offset_idx =
463 nir_iadd(b, res.dyn_offset_base, res.array_index);
464
465 nir_def *dyn_load =
466 nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
467 .base = offsetof(struct anv_push_constants, dynamic_offsets),
468 .range = MAX_DYNAMIC_BUFFERS * 4);
469
470 nir_def *dynamic_offset =
471 nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
472 nir_imm_int(b, 0), dyn_load);
473
474 /* The dynamic offset gets added to the base pointer so that we
475 * have a sliding window range.
476 */
477 nir_def *base_ptr =
478 nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
479 base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
480 desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
481 nir_unpack_64_2x32_split_y(b, base_ptr),
482 nir_channel(b, desc, 2),
483 nir_channel(b, desc, 3));
484 }
485
486 /* The last element of the vec4 is always zero.
487 *
488 * See also struct anv_address_range_descriptor
489 */
490 return nir_vec4(b, nir_channel(b, desc, 0),
491 nir_channel(b, desc, 1),
492 nir_channel(b, desc, 2),
493 nir_imm_int(b, 0));
494 }
495
496 /** Loads descriptor memory for a variable-based deref chain
497 *
498 * The deref chain has to terminate at a variable with a descriptor_set and
499 * binding set. This is used for images, textures, and samplers.
500 */
501 static nir_def *
build_load_var_deref_descriptor_mem(nir_builder * b,nir_deref_instr * deref,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)502 build_load_var_deref_descriptor_mem(nir_builder *b, nir_deref_instr *deref,
503 unsigned desc_offset,
504 unsigned num_components, unsigned bit_size,
505 struct apply_pipeline_layout_state *state)
506 {
507 nir_variable *var = nir_deref_instr_get_variable(deref);
508
509 const uint32_t set = var->data.descriptor_set;
510 const uint32_t binding = var->data.binding;
511 const struct anv_descriptor_set_binding_layout *bind_layout =
512 &state->layout->set[set].layout->binding[binding];
513
514 nir_def *array_index;
515 if (deref->deref_type != nir_deref_type_var) {
516 assert(deref->deref_type == nir_deref_type_array);
517 assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
518 array_index = deref->arr.index.ssa;
519 } else {
520 array_index = nir_imm_int(b, 0);
521 }
522
523 /* It doesn't really matter what address format we choose as everything
524 * will constant-fold nicely. Choose one that uses the actual descriptor
525 * buffer so we don't run into issues index/offset assumptions.
526 */
527 const nir_address_format addr_format =
528 nir_address_format_64bit_bounded_global;
529
530 nir_def *res_index =
531 build_res_index(b, set, binding, array_index, addr_format, state);
532
533 nir_def *desc_addr =
534 build_desc_addr(b, bind_layout, bind_layout->type,
535 res_index, addr_format, state);
536
537 return build_load_descriptor_mem(b, desc_addr, desc_offset,
538 num_components, bit_size, state);
539 }
540
541 /** A recursive form of build_res_index()
542 *
543 * This recursively walks a resource [re]index chain and builds the resource
544 * index. It places the new code with the resource [re]index operation in the
545 * hopes of better CSE. This means the cursor is not where you left it when
546 * this function returns.
547 */
548 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)549 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
550 nir_address_format addr_format,
551 uint32_t *set, uint32_t *binding,
552 struct apply_pipeline_layout_state *state)
553 {
554 if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
555 b->cursor = nir_before_instr(&intrin->instr);
556 *set = nir_intrinsic_desc_set(intrin);
557 *binding = nir_intrinsic_binding(intrin);
558 return build_res_index(b, *set, *binding, intrin->src[0].ssa,
559 addr_format, state);
560 } else {
561 assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
562 nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
563 nir_def *index =
564 build_res_index_for_chain(b, parent, addr_format,
565 set, binding, state);
566
567 b->cursor = nir_before_instr(&intrin->instr);
568
569 return build_res_reindex(b, index, intrin->src[1].ssa, addr_format);
570 }
571 }
572
573 /** Builds a buffer address for a given vulkan [re]index intrinsic
574 *
575 * The cursor is not where you left it when this function returns.
576 */
577 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)578 build_buffer_addr_for_idx_intrin(nir_builder *b,
579 nir_intrinsic_instr *idx_intrin,
580 nir_address_format addr_format,
581 struct apply_pipeline_layout_state *state)
582 {
583 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
584 nir_def *res_index =
585 build_res_index_for_chain(b, idx_intrin, addr_format,
586 &set, &binding, state);
587
588 const struct anv_descriptor_set_binding_layout *bind_layout =
589 &state->layout->set[set].layout->binding[binding];
590
591 return build_buffer_addr_for_res_index(b, bind_layout->type,
592 res_index, addr_format, state);
593 }
594
595 /** Builds a buffer address for deref chain
596 *
597 * This assumes that you can chase the chain all the way back to the original
598 * vulkan_resource_index intrinsic.
599 *
600 * The cursor is not where you left it when this function returns.
601 */
602 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)603 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
604 nir_address_format addr_format,
605 struct apply_pipeline_layout_state *state)
606 {
607 nir_deref_instr *parent = nir_deref_instr_parent(deref);
608 if (parent) {
609 nir_def *addr =
610 build_buffer_addr_for_deref(b, parent, addr_format, state);
611
612 b->cursor = nir_before_instr(&deref->instr);
613 return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
614 }
615
616 nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
617 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
618
619 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
620
621 b->cursor = nir_before_instr(&deref->instr);
622
623 return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
624 }
625
626 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)627 try_lower_direct_buffer_intrinsic(nir_builder *b,
628 nir_intrinsic_instr *intrin,
629 struct apply_pipeline_layout_state *state)
630 {
631 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
632 if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
633 return false;
634
635 nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
636 if (desc == NULL) {
637 /* We should always be able to find the descriptor for UBO access. */
638 assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
639 return false;
640 }
641
642 nir_address_format addr_format = descriptor_address_format(desc, state);
643
644 if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
645 /* Normal binding table-based messages can't handle non-uniform access
646 * so we have to fall back to A64.
647 */
648 if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
649 return false;
650
651 if (!descriptor_has_bti(desc, state))
652 return false;
653
654 /* Rewrite to 32bit_index_offset whenever we can */
655 addr_format = nir_address_format_32bit_index_offset;
656 } else {
657 assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
658
659 /* Rewrite to 32bit_index_offset whenever we can */
660 if (descriptor_has_bti(desc, state))
661 addr_format = nir_address_format_32bit_index_offset;
662 }
663
664 nir_def *addr =
665 build_buffer_addr_for_deref(b, deref, addr_format, state);
666
667 b->cursor = nir_before_instr(&intrin->instr);
668 nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
669
670 return true;
671 }
672
673 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)674 lower_load_accel_struct_desc(nir_builder *b,
675 nir_intrinsic_instr *load_desc,
676 struct apply_pipeline_layout_state *state)
677 {
678 assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
679
680 nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
681
682 /* It doesn't really matter what address format we choose as
683 * everything will constant-fold nicely. Choose one that uses the
684 * actual descriptor buffer.
685 */
686 const nir_address_format addr_format =
687 nir_address_format_64bit_bounded_global;
688
689 uint32_t set = UINT32_MAX, binding = UINT32_MAX;
690 nir_def *res_index =
691 build_res_index_for_chain(b, idx_intrin, addr_format,
692 &set, &binding, state);
693
694 const struct anv_descriptor_set_binding_layout *bind_layout =
695 &state->layout->set[set].layout->binding[binding];
696
697 b->cursor = nir_before_instr(&load_desc->instr);
698
699 nir_def *desc_addr =
700 build_desc_addr(b, bind_layout, bind_layout->type,
701 res_index, addr_format, state);
702
703 /* Acceleration structure descriptors are always uint64_t */
704 nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
705
706 assert(load_desc->def.bit_size == 64);
707 assert(load_desc->def.num_components == 1);
708 nir_def_rewrite_uses(&load_desc->def, desc);
709 nir_instr_remove(&load_desc->instr);
710
711 return true;
712 }
713
714 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)715 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
716 {
717 struct apply_pipeline_layout_state *state = _state;
718
719 if (instr->type != nir_instr_type_intrinsic)
720 return false;
721
722 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
723 switch (intrin->intrinsic) {
724 case nir_intrinsic_load_deref:
725 case nir_intrinsic_store_deref:
726 case nir_intrinsic_deref_atomic:
727 case nir_intrinsic_deref_atomic_swap:
728 return try_lower_direct_buffer_intrinsic(b, intrin, state);
729
730 case nir_intrinsic_load_vulkan_descriptor:
731 if (nir_intrinsic_desc_type(intrin) ==
732 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
733 return lower_load_accel_struct_desc(b, intrin, state);
734 return false;
735
736 default:
737 return false;
738 }
739 }
740
741 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)742 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
743 struct apply_pipeline_layout_state *state)
744 {
745 b->cursor = nir_before_instr(&intrin->instr);
746
747 nir_address_format addr_format =
748 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
749
750 nir_def *index =
751 build_res_index(b, nir_intrinsic_desc_set(intrin),
752 nir_intrinsic_binding(intrin),
753 intrin->src[0].ssa,
754 addr_format, state);
755
756 assert(intrin->def.bit_size == index->bit_size);
757 assert(intrin->def.num_components == index->num_components);
758 nir_def_rewrite_uses(&intrin->def, index);
759 nir_instr_remove(&intrin->instr);
760
761 return true;
762 }
763
764 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)765 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
766 struct apply_pipeline_layout_state *state)
767 {
768 b->cursor = nir_before_instr(&intrin->instr);
769
770 nir_address_format addr_format =
771 addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
772
773 nir_def *index =
774 build_res_reindex(b, intrin->src[0].ssa,
775 intrin->src[1].ssa,
776 addr_format);
777
778 assert(intrin->def.bit_size == index->bit_size);
779 assert(intrin->def.num_components == index->num_components);
780 nir_def_rewrite_uses(&intrin->def, index);
781 nir_instr_remove(&intrin->instr);
782
783 return true;
784 }
785
786 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)787 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
788 struct apply_pipeline_layout_state *state)
789 {
790 b->cursor = nir_before_instr(&intrin->instr);
791
792 const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
793 nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
794
795 nir_def *desc =
796 build_buffer_addr_for_res_index(b, desc_type, intrin->src[0].ssa,
797 addr_format, state);
798
799 assert(intrin->def.bit_size == desc->bit_size);
800 assert(intrin->def.num_components == desc->num_components);
801 nir_def_rewrite_uses(&intrin->def, desc);
802 nir_instr_remove(&intrin->instr);
803
804 return true;
805 }
806
807 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)808 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
809 struct apply_pipeline_layout_state *state)
810 {
811 if (_mesa_set_search(state->lowered_instrs, intrin))
812 return false;
813
814 b->cursor = nir_before_instr(&intrin->instr);
815
816 nir_address_format addr_format =
817 addr_format_for_desc_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, state);
818
819 nir_def *desc =
820 build_buffer_addr_for_res_index(b, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
821 intrin->src[0].ssa, addr_format, state);
822
823 switch (addr_format) {
824 case nir_address_format_64bit_global_32bit_offset:
825 case nir_address_format_64bit_bounded_global: {
826 nir_def *size = nir_channel(b, desc, 2);
827 nir_def_rewrite_uses(&intrin->def, size);
828 nir_instr_remove(&intrin->instr);
829 break;
830 }
831
832 case nir_address_format_32bit_index_offset:
833 /* The binding table index is the first component of the address. The
834 * back-end wants a scalar binding table index source.
835 */
836 nir_src_rewrite(&intrin->src[0], nir_channel(b, desc, 0));
837 break;
838
839 default:
840 unreachable("Unsupported address format");
841 }
842
843 return true;
844 }
845
846 static bool
image_binding_needs_lowered_surface(nir_variable * var)847 image_binding_needs_lowered_surface(nir_variable *var)
848 {
849 return !(var->data.access & ACCESS_NON_READABLE) &&
850 var->data.image.format != PIPE_FORMAT_NONE;
851 }
852
853 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)854 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
855 struct apply_pipeline_layout_state *state)
856 {
857 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
858 nir_variable *var = nir_deref_instr_get_variable(deref);
859
860 unsigned set = var->data.descriptor_set;
861 unsigned binding = var->data.binding;
862 unsigned binding_offset = state->set[set].surface_offsets[binding];
863
864 b->cursor = nir_before_instr(&intrin->instr);
865
866 if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
867 b->cursor = nir_instr_remove(&intrin->instr);
868
869 const unsigned param = nir_intrinsic_base(intrin);
870
871 nir_def *desc =
872 build_load_var_deref_descriptor_mem(b, deref, param * 16,
873 intrin->def.num_components,
874 intrin->def.bit_size, state);
875
876 nir_def_rewrite_uses(&intrin->def, desc);
877 } else {
878 nir_def *index = NULL;
879 if (deref->deref_type != nir_deref_type_var) {
880 assert(deref->deref_type == nir_deref_type_array);
881 index = deref->arr.index.ssa;
882 } else {
883 index = nir_imm_int(b, 0);
884 }
885
886 index = nir_iadd_imm(b, index, binding_offset);
887 nir_rewrite_image_intrinsic(intrin, index, false);
888 }
889
890 return true;
891 }
892
893 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)894 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
895 struct apply_pipeline_layout_state *state)
896 {
897 b->cursor = nir_instr_remove(&intrin->instr);
898
899 /* Any constant-offset load_constant instructions should have been removed
900 * by constant folding.
901 */
902 assert(!nir_src_is_const(intrin->src[0]));
903 nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
904 nir_intrinsic_base(intrin));
905
906 nir_def *data;
907 if (!anv_use_relocations(state->pdevice)) {
908 unsigned load_size = intrin->def.num_components *
909 intrin->def.bit_size / 8;
910 unsigned load_align = intrin->def.bit_size / 8;
911
912 assert(load_size < b->shader->constant_data_size);
913 unsigned max_offset = b->shader->constant_data_size - load_size;
914 offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
915
916 nir_def *const_data_base_addr = nir_pack_64_2x32_split(b,
917 nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_LOW),
918 nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
919
920 data = nir_load_global_constant(b, nir_iadd(b, const_data_base_addr,
921 nir_u2u64(b, offset)),
922 load_align,
923 intrin->def.num_components,
924 intrin->def.bit_size);
925 } else {
926 nir_def *index = nir_imm_int(b, state->constants_offset);
927
928 data = nir_load_ubo(b, intrin->num_components, intrin->def.bit_size,
929 index, offset,
930 .align_mul = intrin->def.bit_size / 8,
931 .align_offset = 0,
932 .range_base = nir_intrinsic_base(intrin),
933 .range = nir_intrinsic_range(intrin));
934 }
935
936 nir_def_rewrite_uses(&intrin->def, data);
937
938 return true;
939 }
940
941 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)942 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
943 struct apply_pipeline_layout_state *state)
944 {
945 b->cursor = nir_instr_remove(&intrin->instr);
946
947 nir_def *base_workgroup_id =
948 nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
949 .base = offsetof(struct anv_push_constants, cs.base_work_group_id),
950 .range = 3 * sizeof(uint32_t));
951 nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
952
953 return true;
954 }
955
956 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned * base_index,unsigned plane,struct apply_pipeline_layout_state * state)957 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
958 nir_tex_src_type deref_src_type,
959 unsigned *base_index, unsigned plane,
960 struct apply_pipeline_layout_state *state)
961 {
962 int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
963 if (deref_src_idx < 0)
964 return;
965
966 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
967 nir_variable *var = nir_deref_instr_get_variable(deref);
968
969 unsigned set = var->data.descriptor_set;
970 unsigned binding = var->data.binding;
971 unsigned array_size =
972 state->layout->set[set].layout->binding[binding].array_size;
973
974 unsigned binding_offset;
975 if (deref_src_type == nir_tex_src_texture_deref) {
976 binding_offset = state->set[set].surface_offsets[binding];
977 } else {
978 assert(deref_src_type == nir_tex_src_sampler_deref);
979 binding_offset = state->set[set].sampler_offsets[binding];
980 }
981
982 nir_tex_src_type offset_src_type;
983 nir_def *index = NULL;
984 if (binding_offset > MAX_BINDING_TABLE_SIZE) {
985 const unsigned plane_offset =
986 plane * sizeof(struct anv_sampled_image_descriptor);
987
988 nir_def *desc =
989 build_load_var_deref_descriptor_mem(b, deref, plane_offset,
990 2, 32, state);
991
992 if (deref_src_type == nir_tex_src_texture_deref) {
993 offset_src_type = nir_tex_src_texture_handle;
994 index = nir_channel(b, desc, 0);
995 } else {
996 assert(deref_src_type == nir_tex_src_sampler_deref);
997 offset_src_type = nir_tex_src_sampler_handle;
998 index = nir_channel(b, desc, 1);
999 }
1000 } else {
1001 if (deref_src_type == nir_tex_src_texture_deref) {
1002 offset_src_type = nir_tex_src_texture_offset;
1003 } else {
1004 assert(deref_src_type == nir_tex_src_sampler_deref);
1005 offset_src_type = nir_tex_src_sampler_offset;
1006 }
1007
1008 *base_index = binding_offset + plane;
1009
1010 if (deref->deref_type != nir_deref_type_var) {
1011 assert(deref->deref_type == nir_deref_type_array);
1012
1013 if (nir_src_is_const(deref->arr.index)) {
1014 unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
1015 struct anv_sampler **immutable_samplers =
1016 state->layout->set[set].layout->binding[binding].immutable_samplers;
1017 if (immutable_samplers) {
1018 /* Array of YCbCr samplers are tightly packed in the binding
1019 * tables, compute the offset of an element in the array by
1020 * adding the number of planes of all preceding elements.
1021 */
1022 unsigned desc_arr_index = 0;
1023 for (int i = 0; i < arr_index; i++)
1024 desc_arr_index += immutable_samplers[i]->n_planes;
1025 *base_index += desc_arr_index;
1026 } else {
1027 *base_index += arr_index;
1028 }
1029 } else {
1030 /* From VK_KHR_sampler_ycbcr_conversion:
1031 *
1032 * If sampler Y’CBCR conversion is enabled, the combined image
1033 * sampler must be indexed only by constant integral expressions
1034 * when aggregated into arrays in shader code, irrespective of
1035 * the shaderSampledImageArrayDynamicIndexing feature.
1036 */
1037 assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
1038
1039 index = deref->arr.index.ssa;
1040 }
1041 }
1042 }
1043
1044 if (index) {
1045 nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1046 tex->src[deref_src_idx].src_type = offset_src_type;
1047 } else {
1048 nir_tex_instr_remove_src(tex, deref_src_idx);
1049 }
1050 }
1051
1052 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1053 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1054 {
1055 int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1056 if (plane_src_idx < 0)
1057 return 0;
1058
1059 unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1060
1061 nir_tex_instr_remove_src(tex, plane_src_idx);
1062
1063 return plane;
1064 }
1065
1066 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1067 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1068 unsigned start, unsigned end)
1069 {
1070 if (start == end - 1) {
1071 return srcs[start];
1072 } else {
1073 unsigned mid = start + (end - start) / 2;
1074 return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1075 build_def_array_select(b, srcs, idx, start, mid),
1076 build_def_array_select(b, srcs, idx, mid, end));
1077 }
1078 }
1079
1080 static void
lower_gfx7_tex_swizzle(nir_builder * b,nir_tex_instr * tex,unsigned plane,struct apply_pipeline_layout_state * state)1081 lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
1082 struct apply_pipeline_layout_state *state)
1083 {
1084 assert(state->pdevice->info.verx10 == 70);
1085 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
1086 nir_tex_instr_is_query(tex) ||
1087 tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
1088 (tex->is_shadow && tex->is_new_style_shadow))
1089 return;
1090
1091 int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1092 assert(deref_src_idx >= 0);
1093
1094 nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1095 nir_variable *var = nir_deref_instr_get_variable(deref);
1096
1097 unsigned set = var->data.descriptor_set;
1098 unsigned binding = var->data.binding;
1099 const struct anv_descriptor_set_binding_layout *bind_layout =
1100 &state->layout->set[set].layout->binding[binding];
1101
1102 if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
1103 return;
1104
1105 b->cursor = nir_before_instr(&tex->instr);
1106
1107 const unsigned plane_offset =
1108 plane * sizeof(struct anv_texture_swizzle_descriptor);
1109 nir_def *swiz =
1110 build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1111 1, 32, state);
1112
1113 b->cursor = nir_after_instr(&tex->instr);
1114
1115 assert(tex->def.bit_size == 32);
1116 assert(tex->def.num_components == 4);
1117
1118 /* Initializing to undef is ok; nir_opt_undef will clean it up. */
1119 nir_def *undef = nir_undef(b, 1, 32);
1120 nir_def *comps[8];
1121 for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
1122 comps[i] = undef;
1123
1124 comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
1125 if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
1126 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
1127 else
1128 comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
1129 comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->def, 0);
1130 comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->def, 1);
1131 comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->def, 2);
1132 comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->def, 3);
1133
1134 nir_def *swiz_comps[4];
1135 for (unsigned i = 0; i < 4; i++) {
1136 nir_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
1137 swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
1138 }
1139 nir_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
1140
1141 /* Rewrite uses before we insert so we don't rewrite this use */
1142 nir_def_rewrite_uses_after(&tex->def,
1143 swiz_tex_res,
1144 swiz_tex_res->parent_instr);
1145 }
1146
1147 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1148 lower_tex(nir_builder *b, nir_tex_instr *tex,
1149 struct apply_pipeline_layout_state *state)
1150 {
1151 unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1152
1153 /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
1154 * before we lower the derefs away so we can still find the descriptor.
1155 */
1156 if (state->pdevice->info.verx10 == 70)
1157 lower_gfx7_tex_swizzle(b, tex, plane, state);
1158
1159 b->cursor = nir_before_instr(&tex->instr);
1160
1161 lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1162 &tex->texture_index, plane, state);
1163
1164 lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1165 &tex->sampler_index, plane, state);
1166
1167 return true;
1168 }
1169
1170 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1171 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1172 {
1173 struct apply_pipeline_layout_state *state = _state;
1174
1175 switch (instr->type) {
1176 case nir_instr_type_intrinsic: {
1177 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1178 switch (intrin->intrinsic) {
1179 case nir_intrinsic_vulkan_resource_index:
1180 return lower_res_index_intrinsic(b, intrin, state);
1181 case nir_intrinsic_vulkan_resource_reindex:
1182 return lower_res_reindex_intrinsic(b, intrin, state);
1183 case nir_intrinsic_load_vulkan_descriptor:
1184 return lower_load_vulkan_descriptor(b, intrin, state);
1185 case nir_intrinsic_get_ssbo_size:
1186 return lower_get_ssbo_size(b, intrin, state);
1187 case nir_intrinsic_image_deref_load:
1188 case nir_intrinsic_image_deref_store:
1189 case nir_intrinsic_image_deref_atomic:
1190 case nir_intrinsic_image_deref_atomic_swap:
1191 case nir_intrinsic_image_deref_size:
1192 case nir_intrinsic_image_deref_samples:
1193 case nir_intrinsic_image_deref_load_param_intel:
1194 case nir_intrinsic_image_deref_load_raw_intel:
1195 case nir_intrinsic_image_deref_store_raw_intel:
1196 return lower_image_intrinsic(b, intrin, state);
1197 case nir_intrinsic_load_constant:
1198 return lower_load_constant(b, intrin, state);
1199 case nir_intrinsic_load_base_workgroup_id:
1200 return lower_base_workgroup_id(b, intrin, state);
1201 default:
1202 return false;
1203 }
1204 break;
1205 }
1206 case nir_instr_type_tex:
1207 return lower_tex(b, nir_instr_as_tex(instr), state);
1208 default:
1209 return false;
1210 }
1211 }
1212
1213 struct binding_info {
1214 uint32_t binding;
1215 uint8_t set;
1216 uint16_t score;
1217 };
1218
1219 static int
compare_binding_infos(const void * _a,const void * _b)1220 compare_binding_infos(const void *_a, const void *_b)
1221 {
1222 const struct binding_info *a = _a, *b = _b;
1223 if (a->score != b->score)
1224 return b->score - a->score;
1225
1226 if (a->set != b->set)
1227 return a->set - b->set;
1228
1229 return a->binding - b->binding;
1230 }
1231
1232 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum elk_robustness_flags robust_flags,const struct anv_pipeline_layout * layout,struct anv_pipeline_bind_map * map)1233 anv_nir_apply_pipeline_layout(nir_shader *shader,
1234 const struct anv_physical_device *pdevice,
1235 enum elk_robustness_flags robust_flags,
1236 const struct anv_pipeline_layout *layout,
1237 struct anv_pipeline_bind_map *map)
1238 {
1239 void *mem_ctx = ralloc_context(NULL);
1240
1241 struct apply_pipeline_layout_state state = {
1242 .pdevice = pdevice,
1243 .layout = layout,
1244 .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
1245 .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
1246 .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
1247 };
1248
1249 for (unsigned s = 0; s < layout->num_sets; s++) {
1250 const unsigned count = layout->set[s].layout->binding_count;
1251 state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count);
1252 state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1253 state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1254 }
1255
1256 nir_shader_instructions_pass(shader, get_used_bindings,
1257 nir_metadata_all, &state);
1258
1259 for (unsigned s = 0; s < layout->num_sets; s++) {
1260 if (state.set[s].desc_buffer_used) {
1261 map->surface_to_descriptor[map->surface_count] =
1262 (struct anv_pipeline_binding) {
1263 .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
1264 .index = s,
1265 };
1266 state.set[s].desc_offset = map->surface_count;
1267 map->surface_count++;
1268 }
1269 }
1270
1271 if (state.uses_constants && anv_use_relocations(pdevice)) {
1272 state.constants_offset = map->surface_count;
1273 map->surface_to_descriptor[map->surface_count].set =
1274 ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
1275 map->surface_count++;
1276 }
1277
1278 unsigned used_binding_count = 0;
1279 for (uint32_t set = 0; set < layout->num_sets; set++) {
1280 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1281 for (unsigned b = 0; b < set_layout->binding_count; b++) {
1282 if (state.set[set].use_count[b] == 0)
1283 continue;
1284
1285 used_binding_count++;
1286 }
1287 }
1288
1289 struct binding_info *infos =
1290 rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
1291 used_binding_count = 0;
1292 for (uint32_t set = 0; set < layout->num_sets; set++) {
1293 const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1294 for (unsigned b = 0; b < set_layout->binding_count; b++) {
1295 if (state.set[set].use_count[b] == 0)
1296 continue;
1297
1298 const struct anv_descriptor_set_binding_layout *binding =
1299 &layout->set[set].layout->binding[b];
1300
1301 /* Do a fixed-point calculation to generate a score based on the
1302 * number of uses and the binding array size. We shift by 7 instead
1303 * of 8 because we're going to use the top bit below to make
1304 * everything which does not support bindless super higher priority
1305 * than things which do.
1306 */
1307 uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) /
1308 binding->array_size;
1309
1310 /* If the descriptor type doesn't support bindless then put it at the
1311 * beginning so we guarantee it gets a slot.
1312 */
1313 if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
1314 !anv_descriptor_supports_bindless(pdevice, binding, false))
1315 score |= 1 << 15;
1316
1317 infos[used_binding_count++] = (struct binding_info) {
1318 .set = set,
1319 .binding = b,
1320 .score = score,
1321 };
1322 }
1323 }
1324
1325 /* Order the binding infos based on score with highest scores first. If
1326 * scores are equal we then order by set and binding.
1327 */
1328 qsort(infos, used_binding_count, sizeof(struct binding_info),
1329 compare_binding_infos);
1330
1331 for (unsigned i = 0; i < used_binding_count; i++) {
1332 unsigned set = infos[i].set, b = infos[i].binding;
1333 const struct anv_descriptor_set_binding_layout *binding =
1334 &layout->set[set].layout->binding[b];
1335
1336 const uint32_t array_size = binding->array_size;
1337
1338 if (binding->dynamic_offset_index >= 0)
1339 state.has_dynamic_buffers = true;
1340
1341 if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
1342 assert(map->surface_count + array_size <= MAX_BINDING_TABLE_SIZE);
1343 assert(!anv_descriptor_requires_bindless(pdevice, binding, false));
1344 state.set[set].surface_offsets[b] = map->surface_count;
1345 if (binding->dynamic_offset_index < 0) {
1346 struct anv_sampler **samplers = binding->immutable_samplers;
1347 for (unsigned i = 0; i < binding->array_size; i++) {
1348 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1349 for (uint8_t p = 0; p < planes; p++) {
1350 map->surface_to_descriptor[map->surface_count++] =
1351 (struct anv_pipeline_binding) {
1352 .set = set,
1353 .index = binding->descriptor_index + i,
1354 .plane = p,
1355 };
1356 }
1357 }
1358 } else {
1359 for (unsigned i = 0; i < binding->array_size; i++) {
1360 map->surface_to_descriptor[map->surface_count++] =
1361 (struct anv_pipeline_binding) {
1362 .set = set,
1363 .index = binding->descriptor_index + i,
1364 .dynamic_offset_index =
1365 layout->set[set].dynamic_offset_start +
1366 binding->dynamic_offset_index + i,
1367 };
1368 }
1369 }
1370 assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1371 }
1372
1373 if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) {
1374 if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE ||
1375 anv_descriptor_requires_bindless(pdevice, binding, true)) {
1376 /* If this descriptor doesn't fit in the binding table or if it
1377 * requires bindless for some reason, flag it as bindless.
1378 *
1379 * We also make large sampler arrays bindless because we can avoid
1380 * using indirect sends thanks to bindless samplers being packed
1381 * less tightly than the sampler table.
1382 */
1383 assert(anv_descriptor_supports_bindless(pdevice, binding, true));
1384 state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
1385 } else {
1386 state.set[set].sampler_offsets[b] = map->sampler_count;
1387 struct anv_sampler **samplers = binding->immutable_samplers;
1388 for (unsigned i = 0; i < binding->array_size; i++) {
1389 uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1390 for (uint8_t p = 0; p < planes; p++) {
1391 map->sampler_to_descriptor[map->sampler_count++] =
1392 (struct anv_pipeline_binding) {
1393 .set = set,
1394 .index = binding->descriptor_index + i,
1395 .plane = p,
1396 };
1397 }
1398 }
1399 }
1400 }
1401 }
1402
1403 nir_foreach_image_variable(var, shader) {
1404 const uint32_t set = var->data.descriptor_set;
1405 const uint32_t binding = var->data.binding;
1406 const struct anv_descriptor_set_binding_layout *bind_layout =
1407 &layout->set[set].layout->binding[binding];
1408 const uint32_t array_size = bind_layout->array_size;
1409
1410 if (state.set[set].use_count[binding] == 0)
1411 continue;
1412
1413 if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE)
1414 continue;
1415
1416 struct anv_pipeline_binding *pipe_binding =
1417 &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
1418 for (unsigned i = 0; i < array_size; i++) {
1419 assert(pipe_binding[i].set == set);
1420 assert(pipe_binding[i].index == bind_layout->descriptor_index + i);
1421
1422 pipe_binding[i].lowered_storage_surface =
1423 image_binding_needs_lowered_surface(var);
1424 }
1425 }
1426
1427 /* Before we do the normal lowering, we look for any SSBO operations
1428 * that we can lower to the BTI model and lower them up-front. The BTI
1429 * model can perform better than the A64 model for a couple reasons:
1430 *
1431 * 1. 48-bit address calculations are potentially expensive and using
1432 * the BTI model lets us simply compute 32-bit offsets and the
1433 * hardware adds the 64-bit surface base address.
1434 *
1435 * 2. The BTI messages, because they use surface states, do bounds
1436 * checking for us. With the A64 model, we have to do our own
1437 * bounds checking and this means wider pointers and extra
1438 * calculations and branching in the shader.
1439 *
1440 * The solution to both of these is to convert things to the BTI model
1441 * opportunistically. The reason why we need to do this as a pre-pass
1442 * is for two reasons:
1443 *
1444 * 1. The BTI model requires nir_address_format_32bit_index_offset
1445 * pointers which are not the same type as the pointers needed for
1446 * the A64 model. Because all our derefs are set up for the A64
1447 * model (in case we have variable pointers), we have to crawl all
1448 * the way back to the vulkan_resource_index intrinsic and build a
1449 * completely fresh index+offset calculation.
1450 *
1451 * 2. Because the variable-pointers-capable lowering that we do as part
1452 * of apply_pipeline_layout_block is destructive (It really has to
1453 * be to handle variable pointers properly), we've lost the deref
1454 * information by the time we get to the load/store/atomic
1455 * intrinsics in that pass.
1456 */
1457 nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
1458 nir_metadata_block_index |
1459 nir_metadata_dominance,
1460 &state);
1461
1462 /* We just got rid of all the direct access. Delete it so it's not in the
1463 * way when we do our indirect lowering.
1464 */
1465 nir_opt_dce(shader);
1466
1467 nir_shader_instructions_pass(shader, apply_pipeline_layout,
1468 nir_metadata_block_index |
1469 nir_metadata_dominance,
1470 &state);
1471
1472 ralloc_free(mem_ctx);
1473
1474 /* Now that we're done computing the surface and sampler portions of the
1475 * bind map, hash them. This lets us quickly determine if the actual
1476 * mapping has changed and not just a no-op pipeline change.
1477 */
1478 _mesa_sha1_compute(map->surface_to_descriptor,
1479 map->surface_count * sizeof(struct anv_pipeline_binding),
1480 map->surface_sha1);
1481 _mesa_sha1_compute(map->sampler_to_descriptor,
1482 map->sampler_count * sizeof(struct anv_pipeline_binding),
1483 map->sampler_sha1);
1484 }
1485