• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_nir.h"
25 #include "nir/nir_builder.h"
26 #include "compiler/elk/elk_nir.h"
27 #include "util/mesa-sha1.h"
28 #include "util/set.h"
29 
30 /* Sampler tables don't actually have a maximum size but we pick one just so
31  * that we don't end up emitting too much state on-the-fly.
32  */
33 #define MAX_SAMPLER_TABLE_SIZE 128
34 #define BINDLESS_OFFSET        255
35 
36 #define sizeof_field(type, field) sizeof(((type *)0)->field)
37 
38 struct apply_pipeline_layout_state {
39    const struct anv_physical_device *pdevice;
40 
41    const struct anv_pipeline_layout *layout;
42    nir_address_format ssbo_addr_format;
43    nir_address_format ubo_addr_format;
44 
45    /* Place to flag lowered instructions so we don't lower them twice */
46    struct set *lowered_instrs;
47 
48    bool uses_constants;
49    bool has_dynamic_buffers;
50    uint8_t constants_offset;
51    struct {
52       bool desc_buffer_used;
53       uint8_t desc_offset;
54 
55       uint8_t *use_count;
56       uint8_t *surface_offsets;
57       uint8_t *sampler_offsets;
58    } set[MAX_SETS];
59 };
60 
61 static nir_address_format
addr_format_for_desc_type(VkDescriptorType desc_type,struct apply_pipeline_layout_state * state)62 addr_format_for_desc_type(VkDescriptorType desc_type,
63                           struct apply_pipeline_layout_state *state)
64 {
65    switch (desc_type) {
66    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
67    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
68       return state->ssbo_addr_format;
69 
70    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
71    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
72       return state->ubo_addr_format;
73 
74    case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
75       return nir_address_format_32bit_index_offset;
76 
77    default:
78       unreachable("Unsupported descriptor type");
79    }
80 }
81 
82 static void
add_binding(struct apply_pipeline_layout_state * state,uint32_t set,uint32_t binding)83 add_binding(struct apply_pipeline_layout_state *state,
84             uint32_t set, uint32_t binding)
85 {
86    const struct anv_descriptor_set_binding_layout *bind_layout =
87       &state->layout->set[set].layout->binding[binding];
88 
89    if (state->set[set].use_count[binding] < UINT8_MAX)
90       state->set[set].use_count[binding]++;
91 
92    /* Only flag the descriptor buffer as used if there's actually data for
93     * this binding.  This lets us be lazy and call this function constantly
94     * without worrying about unnecessarily enabling the buffer.
95     */
96    if (bind_layout->descriptor_stride)
97       state->set[set].desc_buffer_used = true;
98 }
99 
100 static void
add_deref_src_binding(struct apply_pipeline_layout_state * state,nir_src src)101 add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
102 {
103    nir_deref_instr *deref = nir_src_as_deref(src);
104    nir_variable *var = nir_deref_instr_get_variable(deref);
105    add_binding(state, var->data.descriptor_set, var->data.binding);
106 }
107 
108 static void
add_tex_src_binding(struct apply_pipeline_layout_state * state,nir_tex_instr * tex,nir_tex_src_type deref_src_type)109 add_tex_src_binding(struct apply_pipeline_layout_state *state,
110                     nir_tex_instr *tex, nir_tex_src_type deref_src_type)
111 {
112    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
113    if (deref_src_idx < 0)
114       return;
115 
116    add_deref_src_binding(state, tex->src[deref_src_idx].src);
117 }
118 
119 static bool
get_used_bindings(UNUSED nir_builder * _b,nir_instr * instr,void * _state)120 get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
121 {
122    struct apply_pipeline_layout_state *state = _state;
123 
124    switch (instr->type) {
125    case nir_instr_type_intrinsic: {
126       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
127       switch (intrin->intrinsic) {
128       case nir_intrinsic_vulkan_resource_index:
129          add_binding(state, nir_intrinsic_desc_set(intrin),
130                      nir_intrinsic_binding(intrin));
131          break;
132 
133       case nir_intrinsic_image_deref_load:
134       case nir_intrinsic_image_deref_store:
135       case nir_intrinsic_image_deref_atomic:
136       case nir_intrinsic_image_deref_atomic_swap:
137       case nir_intrinsic_image_deref_size:
138       case nir_intrinsic_image_deref_samples:
139       case nir_intrinsic_image_deref_load_param_intel:
140       case nir_intrinsic_image_deref_load_raw_intel:
141       case nir_intrinsic_image_deref_store_raw_intel:
142          add_deref_src_binding(state, intrin->src[0]);
143          break;
144 
145       case nir_intrinsic_load_constant:
146          state->uses_constants = true;
147          break;
148 
149       default:
150          break;
151       }
152       break;
153    }
154    case nir_instr_type_tex: {
155       nir_tex_instr *tex = nir_instr_as_tex(instr);
156       add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
157       add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
158       break;
159    }
160    default:
161       break;
162    }
163 
164    return false;
165 }
166 
167 static nir_intrinsic_instr *
find_descriptor_for_index_src(nir_src src,struct apply_pipeline_layout_state * state)168 find_descriptor_for_index_src(nir_src src,
169                               struct apply_pipeline_layout_state *state)
170 {
171    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
172 
173    while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
174       intrin = nir_src_as_intrinsic(intrin->src[0]);
175 
176    if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
177       return NULL;
178 
179    return intrin;
180 }
181 
182 static bool
descriptor_has_bti(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)183 descriptor_has_bti(nir_intrinsic_instr *intrin,
184                    struct apply_pipeline_layout_state *state)
185 {
186    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
187 
188    uint32_t set = nir_intrinsic_desc_set(intrin);
189    uint32_t binding = nir_intrinsic_binding(intrin);
190    const struct anv_descriptor_set_binding_layout *bind_layout =
191       &state->layout->set[set].layout->binding[binding];
192 
193    uint32_t surface_index;
194    if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
195       surface_index = state->set[set].desc_offset;
196    else
197       surface_index = state->set[set].surface_offsets[binding];
198 
199    /* Only lower to a BTI message if we have a valid binding table index. */
200    return surface_index < MAX_BINDING_TABLE_SIZE;
201 }
202 
203 static nir_address_format
descriptor_address_format(nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)204 descriptor_address_format(nir_intrinsic_instr *intrin,
205                           struct apply_pipeline_layout_state *state)
206 {
207    assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
208 
209    return addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
210 }
211 
212 static nir_intrinsic_instr *
nir_deref_find_descriptor(nir_deref_instr * deref,struct apply_pipeline_layout_state * state)213 nir_deref_find_descriptor(nir_deref_instr *deref,
214                           struct apply_pipeline_layout_state *state)
215 {
216    while (1) {
217       /* Nothing we will use this on has a variable */
218       assert(deref->deref_type != nir_deref_type_var);
219 
220       nir_deref_instr *parent = nir_src_as_deref(deref->parent);
221       if (!parent)
222          break;
223 
224       deref = parent;
225    }
226    assert(deref->deref_type == nir_deref_type_cast);
227 
228    nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
229    if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
230       return false;
231 
232    return find_descriptor_for_index_src(intrin->src[0], state);
233 }
234 
235 static nir_def *
build_load_descriptor_mem(nir_builder * b,nir_def * desc_addr,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)236 build_load_descriptor_mem(nir_builder *b,
237                           nir_def *desc_addr, unsigned desc_offset,
238                           unsigned num_components, unsigned bit_size,
239                           struct apply_pipeline_layout_state *state)
240 
241 {
242    nir_def *surface_index = nir_channel(b, desc_addr, 0);
243    nir_def *offset32 =
244       nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
245 
246    return nir_load_ubo(b, num_components, bit_size,
247                        surface_index, offset32,
248                        .align_mul = 8,
249                        .align_offset = desc_offset % 8,
250                        .range_base = 0,
251                        .range = ~0);
252 }
253 
254 /** Build a Vulkan resource index
255  *
256  * A "resource index" is the term used by our SPIR-V parser and the relevant
257  * NIR intrinsics for a reference into a descriptor set.  It acts much like a
258  * deref in NIR except that it accesses opaque descriptors instead of memory.
259  *
260  * Coming out of SPIR-V, both the resource indices (in the form of
261  * vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
262  * of nir_deref_instr) use the same vector component/bit size.  The meaning
263  * of those values for memory derefs (nir_deref_instr) is given by the
264  * nir_address_format associated with the descriptor type.  For resource
265  * indices, it's an entirely internal to ANV encoding which describes, in some
266  * sense, the address of the descriptor.  Thanks to the NIR/SPIR-V rules, it
267  * must be packed into the same size SSA values as a memory address.  For this
268  * reason, the actual encoding may depend both on the address format for
269  * memory derefs and the descriptor address format.
270  *
271  * The load_vulkan_descriptor intrinsic exists to provide a transition point
272  * between these two forms of derefs: descriptor and memory.
273  */
274 static nir_def *
build_res_index(nir_builder * b,uint32_t set,uint32_t binding,nir_def * array_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)275 build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
276                 nir_def *array_index, nir_address_format addr_format,
277                 struct apply_pipeline_layout_state *state)
278 {
279    const struct anv_descriptor_set_binding_layout *bind_layout =
280       &state->layout->set[set].layout->binding[binding];
281 
282    uint32_t array_size = bind_layout->array_size;
283 
284    switch (addr_format) {
285    case nir_address_format_64bit_global_32bit_offset:
286    case nir_address_format_64bit_bounded_global: {
287       assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
288       uint32_t set_idx = state->set[set].desc_offset;
289 
290       assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
291       uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */
292       if (bind_layout->dynamic_offset_index >= 0) {
293          dynamic_offset_index =
294             state->layout->set[set].dynamic_offset_start +
295             bind_layout->dynamic_offset_index;
296       }
297 
298       const uint32_t packed = (bind_layout->descriptor_stride << 16) | (set_idx << 8) | dynamic_offset_index;
299 
300       return nir_vec4(b, nir_imm_int(b, packed),
301                          nir_imm_int(b, bind_layout->descriptor_offset),
302                          nir_imm_int(b, array_size - 1),
303                          array_index);
304    }
305 
306    case nir_address_format_32bit_index_offset: {
307       if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
308          uint32_t surface_index = state->set[set].desc_offset;
309          return nir_imm_ivec2(b, surface_index,
310                                  bind_layout->descriptor_offset);
311       } else {
312          uint32_t surface_index = state->set[set].surface_offsets[binding];
313          assert(array_size > 0 && array_size <= UINT16_MAX);
314          assert(surface_index <= UINT16_MAX);
315          uint32_t packed = ((array_size - 1) << 16) | surface_index;
316          return nir_vec2(b, array_index, nir_imm_int(b, packed));
317       }
318    }
319 
320    default:
321       unreachable("Unsupported address format");
322    }
323 }
324 
325 struct res_index_defs {
326    nir_def *set_idx;
327    nir_def *dyn_offset_base;
328    nir_def *desc_offset_base;
329    nir_def *array_index;
330    nir_def *desc_stride;
331 };
332 
333 static struct res_index_defs
unpack_res_index(nir_builder * b,nir_def * index)334 unpack_res_index(nir_builder *b, nir_def *index)
335 {
336    struct res_index_defs defs;
337 
338    nir_def *packed = nir_channel(b, index, 0);
339    defs.desc_stride = nir_extract_u8(b, packed, nir_imm_int(b, 2));
340    defs.set_idx = nir_extract_u8(b, packed, nir_imm_int(b, 1));
341    defs.dyn_offset_base = nir_extract_u8(b, packed, nir_imm_int(b, 0));
342 
343    defs.desc_offset_base = nir_channel(b, index, 1);
344    defs.array_index = nir_umin(b, nir_channel(b, index, 2),
345                                   nir_channel(b, index, 3));
346 
347    return defs;
348 }
349 
350 /** Adjust a Vulkan resource index
351  *
352  * This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
353  * For array descriptors, it allows us to adjust the array index.  Thanks to
354  * variable pointers, we cannot always fold this re-index operation into the
355  * vulkan_resource_index intrinsic and we have to do it based on nothing but
356  * the address format.
357  */
358 static nir_def *
build_res_reindex(nir_builder * b,nir_def * orig,nir_def * delta,nir_address_format addr_format)359 build_res_reindex(nir_builder *b, nir_def *orig, nir_def *delta,
360                   nir_address_format addr_format)
361 {
362    switch (addr_format) {
363    case nir_address_format_64bit_global_32bit_offset:
364    case nir_address_format_64bit_bounded_global:
365       return nir_vec4(b, nir_channel(b, orig, 0),
366                          nir_channel(b, orig, 1),
367                          nir_channel(b, orig, 2),
368                          nir_iadd(b, nir_channel(b, orig, 3), delta));
369 
370    case nir_address_format_32bit_index_offset:
371       return nir_vec2(b, nir_iadd(b, nir_channel(b, orig, 0), delta),
372                          nir_channel(b, orig, 1));
373 
374    default:
375       unreachable("Unhandled address format");
376    }
377 }
378 
379 /** Get the address for a descriptor given its resource index
380  *
381  * Because of the re-indexing operations, we can't bounds check descriptor
382  * array access until we have the final index.  That means we end up doing the
383  * bounds check here, if needed.  See unpack_res_index() for more details.
384  *
385  * This function takes both a bind_layout and a desc_type which are used to
386  * determine the descriptor stride for array descriptors.  The bind_layout is
387  * optional for buffer descriptor types.
388  */
389 static nir_def *
build_desc_addr(nir_builder * b,const struct anv_descriptor_set_binding_layout * bind_layout,const VkDescriptorType desc_type,nir_def * index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)390 build_desc_addr(nir_builder *b,
391                 const struct anv_descriptor_set_binding_layout *bind_layout,
392                 const VkDescriptorType desc_type,
393                 nir_def *index, nir_address_format addr_format,
394                 struct apply_pipeline_layout_state *state)
395 {
396    switch (addr_format) {
397    case nir_address_format_64bit_global_32bit_offset:
398    case nir_address_format_64bit_bounded_global: {
399       struct res_index_defs res = unpack_res_index(b, index);
400 
401       nir_def *desc_offset = res.desc_offset_base;
402       if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
403          /* Compute the actual descriptor offset.  For inline uniform blocks,
404           * the array index is ignored as they are only allowed to be a single
405           * descriptor (not an array) and there is no concept of a "stride".
406           *
407           */
408          desc_offset =
409             nir_iadd(b, desc_offset, nir_imul(b, res.array_index, res.desc_stride));
410       }
411 
412       return nir_vec2(b, res.set_idx, desc_offset);
413    }
414 
415    case nir_address_format_32bit_index_offset:
416       assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
417       return index;
418 
419    default:
420       unreachable("Unhandled address format");
421    }
422 }
423 
424 /** Convert a Vulkan resource index into a buffer address
425  *
426  * In some cases, this does a  memory load from the descriptor set and, in
427  * others, it simply converts from one form to another.
428  *
429  * See build_res_index for details about each resource index format.
430  */
431 static nir_def *
build_buffer_addr_for_res_index(nir_builder * b,const VkDescriptorType desc_type,nir_def * res_index,nir_address_format addr_format,struct apply_pipeline_layout_state * state)432 build_buffer_addr_for_res_index(nir_builder *b,
433                                 const VkDescriptorType desc_type,
434                                 nir_def *res_index,
435                                 nir_address_format addr_format,
436                                 struct apply_pipeline_layout_state *state)
437 {
438    if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
439       assert(addr_format == nir_address_format_32bit_index_offset);
440       return build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
441    } else if (addr_format == nir_address_format_32bit_index_offset) {
442       nir_def *array_index = nir_channel(b, res_index, 0);
443       nir_def *packed = nir_channel(b, res_index, 1);
444       nir_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
445 
446       return nir_vec2(b, nir_iadd(b, surface_index, array_index),
447                          nir_imm_int(b, 0));
448    }
449 
450    nir_def *desc_addr =
451       build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
452 
453    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
454 
455    if (state->has_dynamic_buffers) {
456       struct res_index_defs res = unpack_res_index(b, res_index);
457 
458       /* This shader has dynamic offsets and we have no way of knowing
459        * (save from the dynamic offset base index) if this buffer has a
460        * dynamic offset.
461        */
462       nir_def *dyn_offset_idx =
463          nir_iadd(b, res.dyn_offset_base, res.array_index);
464 
465       nir_def *dyn_load =
466          nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
467                                 .base = offsetof(struct anv_push_constants, dynamic_offsets),
468                                 .range = MAX_DYNAMIC_BUFFERS * 4);
469 
470       nir_def *dynamic_offset =
471          nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
472                       nir_imm_int(b, 0), dyn_load);
473 
474       /* The dynamic offset gets added to the base pointer so that we
475        * have a sliding window range.
476        */
477       nir_def *base_ptr =
478          nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
479       base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
480       desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
481                          nir_unpack_64_2x32_split_y(b, base_ptr),
482                          nir_channel(b, desc, 2),
483                          nir_channel(b, desc, 3));
484    }
485 
486    /* The last element of the vec4 is always zero.
487     *
488     * See also struct anv_address_range_descriptor
489     */
490    return nir_vec4(b, nir_channel(b, desc, 0),
491                       nir_channel(b, desc, 1),
492                       nir_channel(b, desc, 2),
493                       nir_imm_int(b, 0));
494 }
495 
496 /** Loads descriptor memory for a variable-based deref chain
497  *
498  * The deref chain has to terminate at a variable with a descriptor_set and
499  * binding set.  This is used for images, textures, and samplers.
500  */
501 static nir_def *
build_load_var_deref_descriptor_mem(nir_builder * b,nir_deref_instr * deref,unsigned desc_offset,unsigned num_components,unsigned bit_size,struct apply_pipeline_layout_state * state)502 build_load_var_deref_descriptor_mem(nir_builder *b, nir_deref_instr *deref,
503                                     unsigned desc_offset,
504                                     unsigned num_components, unsigned bit_size,
505                                     struct apply_pipeline_layout_state *state)
506 {
507    nir_variable *var = nir_deref_instr_get_variable(deref);
508 
509    const uint32_t set = var->data.descriptor_set;
510    const uint32_t binding = var->data.binding;
511    const struct anv_descriptor_set_binding_layout *bind_layout =
512          &state->layout->set[set].layout->binding[binding];
513 
514    nir_def *array_index;
515    if (deref->deref_type != nir_deref_type_var) {
516       assert(deref->deref_type == nir_deref_type_array);
517       assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
518       array_index = deref->arr.index.ssa;
519    } else {
520       array_index = nir_imm_int(b, 0);
521    }
522 
523    /* It doesn't really matter what address format we choose as everything
524     * will constant-fold nicely.  Choose one that uses the actual descriptor
525     * buffer so we don't run into issues index/offset assumptions.
526     */
527    const nir_address_format addr_format =
528       nir_address_format_64bit_bounded_global;
529 
530    nir_def *res_index =
531       build_res_index(b, set, binding, array_index, addr_format, state);
532 
533    nir_def *desc_addr =
534       build_desc_addr(b, bind_layout, bind_layout->type,
535                       res_index, addr_format, state);
536 
537    return build_load_descriptor_mem(b, desc_addr, desc_offset,
538                                     num_components, bit_size, state);
539 }
540 
541 /** A recursive form of build_res_index()
542  *
543  * This recursively walks a resource [re]index chain and builds the resource
544  * index.  It places the new code with the resource [re]index operation in the
545  * hopes of better CSE.  This means the cursor is not where you left it when
546  * this function returns.
547  */
548 static nir_def *
build_res_index_for_chain(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format,uint32_t * set,uint32_t * binding,struct apply_pipeline_layout_state * state)549 build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
550                           nir_address_format addr_format,
551                           uint32_t *set, uint32_t *binding,
552                           struct apply_pipeline_layout_state *state)
553 {
554    if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
555       b->cursor = nir_before_instr(&intrin->instr);
556       *set = nir_intrinsic_desc_set(intrin);
557       *binding = nir_intrinsic_binding(intrin);
558       return build_res_index(b, *set, *binding, intrin->src[0].ssa,
559                              addr_format, state);
560    } else {
561       assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
562       nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
563       nir_def *index =
564          build_res_index_for_chain(b, parent, addr_format,
565                                    set, binding, state);
566 
567       b->cursor = nir_before_instr(&intrin->instr);
568 
569       return build_res_reindex(b, index, intrin->src[1].ssa, addr_format);
570    }
571 }
572 
573 /** Builds a buffer address for a given vulkan [re]index intrinsic
574  *
575  * The cursor is not where you left it when this function returns.
576  */
577 static nir_def *
build_buffer_addr_for_idx_intrin(nir_builder * b,nir_intrinsic_instr * idx_intrin,nir_address_format addr_format,struct apply_pipeline_layout_state * state)578 build_buffer_addr_for_idx_intrin(nir_builder *b,
579                                  nir_intrinsic_instr *idx_intrin,
580                                  nir_address_format addr_format,
581                                  struct apply_pipeline_layout_state *state)
582 {
583    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
584    nir_def *res_index =
585       build_res_index_for_chain(b, idx_intrin, addr_format,
586                                 &set, &binding, state);
587 
588    const struct anv_descriptor_set_binding_layout *bind_layout =
589       &state->layout->set[set].layout->binding[binding];
590 
591    return build_buffer_addr_for_res_index(b, bind_layout->type,
592                                           res_index, addr_format, state);
593 }
594 
595 /** Builds a buffer address for deref chain
596  *
597  * This assumes that you can chase the chain all the way back to the original
598  * vulkan_resource_index intrinsic.
599  *
600  * The cursor is not where you left it when this function returns.
601  */
602 static nir_def *
build_buffer_addr_for_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format,struct apply_pipeline_layout_state * state)603 build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
604                             nir_address_format addr_format,
605                             struct apply_pipeline_layout_state *state)
606 {
607    nir_deref_instr *parent = nir_deref_instr_parent(deref);
608    if (parent) {
609       nir_def *addr =
610          build_buffer_addr_for_deref(b, parent, addr_format, state);
611 
612       b->cursor = nir_before_instr(&deref->instr);
613       return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
614    }
615 
616    nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
617    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
618 
619    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
620 
621    b->cursor = nir_before_instr(&deref->instr);
622 
623    return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
624 }
625 
626 static bool
try_lower_direct_buffer_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)627 try_lower_direct_buffer_intrinsic(nir_builder *b,
628                                   nir_intrinsic_instr *intrin,
629                                   struct apply_pipeline_layout_state *state)
630 {
631    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
632    if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
633       return false;
634 
635    nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
636    if (desc == NULL) {
637       /* We should always be able to find the descriptor for UBO access. */
638       assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
639       return false;
640    }
641 
642    nir_address_format addr_format = descriptor_address_format(desc, state);
643 
644    if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
645       /* Normal binding table-based messages can't handle non-uniform access
646        * so we have to fall back to A64.
647        */
648       if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
649          return false;
650 
651       if (!descriptor_has_bti(desc, state))
652          return false;
653 
654       /* Rewrite to 32bit_index_offset whenever we can */
655       addr_format = nir_address_format_32bit_index_offset;
656    } else {
657       assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
658 
659       /* Rewrite to 32bit_index_offset whenever we can */
660       if (descriptor_has_bti(desc, state))
661          addr_format = nir_address_format_32bit_index_offset;
662    }
663 
664    nir_def *addr =
665       build_buffer_addr_for_deref(b, deref, addr_format, state);
666 
667    b->cursor = nir_before_instr(&intrin->instr);
668    nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
669 
670    return true;
671 }
672 
673 static bool
lower_load_accel_struct_desc(nir_builder * b,nir_intrinsic_instr * load_desc,struct apply_pipeline_layout_state * state)674 lower_load_accel_struct_desc(nir_builder *b,
675                              nir_intrinsic_instr *load_desc,
676                              struct apply_pipeline_layout_state *state)
677 {
678    assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
679 
680    nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
681 
682    /* It doesn't really matter what address format we choose as
683     * everything will constant-fold nicely.  Choose one that uses the
684     * actual descriptor buffer.
685     */
686    const nir_address_format addr_format =
687       nir_address_format_64bit_bounded_global;
688 
689    uint32_t set = UINT32_MAX, binding = UINT32_MAX;
690    nir_def *res_index =
691       build_res_index_for_chain(b, idx_intrin, addr_format,
692                                 &set, &binding, state);
693 
694    const struct anv_descriptor_set_binding_layout *bind_layout =
695       &state->layout->set[set].layout->binding[binding];
696 
697    b->cursor = nir_before_instr(&load_desc->instr);
698 
699    nir_def *desc_addr =
700       build_desc_addr(b, bind_layout, bind_layout->type,
701                       res_index, addr_format, state);
702 
703    /* Acceleration structure descriptors are always uint64_t */
704    nir_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
705 
706    assert(load_desc->def.bit_size == 64);
707    assert(load_desc->def.num_components == 1);
708    nir_def_rewrite_uses(&load_desc->def, desc);
709    nir_instr_remove(&load_desc->instr);
710 
711    return true;
712 }
713 
714 static bool
lower_direct_buffer_instr(nir_builder * b,nir_instr * instr,void * _state)715 lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
716 {
717    struct apply_pipeline_layout_state *state = _state;
718 
719    if (instr->type != nir_instr_type_intrinsic)
720       return false;
721 
722    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
723    switch (intrin->intrinsic) {
724    case nir_intrinsic_load_deref:
725    case nir_intrinsic_store_deref:
726    case nir_intrinsic_deref_atomic:
727    case nir_intrinsic_deref_atomic_swap:
728       return try_lower_direct_buffer_intrinsic(b, intrin, state);
729 
730    case nir_intrinsic_load_vulkan_descriptor:
731       if (nir_intrinsic_desc_type(intrin) ==
732           VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
733          return lower_load_accel_struct_desc(b, intrin, state);
734       return false;
735 
736    default:
737       return false;
738    }
739 }
740 
741 static bool
lower_res_index_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)742 lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
743                           struct apply_pipeline_layout_state *state)
744 {
745    b->cursor = nir_before_instr(&intrin->instr);
746 
747    nir_address_format addr_format =
748       addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
749 
750    nir_def *index =
751       build_res_index(b, nir_intrinsic_desc_set(intrin),
752                          nir_intrinsic_binding(intrin),
753                          intrin->src[0].ssa,
754                          addr_format, state);
755 
756    assert(intrin->def.bit_size == index->bit_size);
757    assert(intrin->def.num_components == index->num_components);
758    nir_def_rewrite_uses(&intrin->def, index);
759    nir_instr_remove(&intrin->instr);
760 
761    return true;
762 }
763 
764 static bool
lower_res_reindex_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)765 lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
766                             struct apply_pipeline_layout_state *state)
767 {
768    b->cursor = nir_before_instr(&intrin->instr);
769 
770    nir_address_format addr_format =
771       addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
772 
773    nir_def *index =
774       build_res_reindex(b, intrin->src[0].ssa,
775                            intrin->src[1].ssa,
776                            addr_format);
777 
778    assert(intrin->def.bit_size == index->bit_size);
779    assert(intrin->def.num_components == index->num_components);
780    nir_def_rewrite_uses(&intrin->def, index);
781    nir_instr_remove(&intrin->instr);
782 
783    return true;
784 }
785 
786 static bool
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)787 lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
788                              struct apply_pipeline_layout_state *state)
789 {
790    b->cursor = nir_before_instr(&intrin->instr);
791 
792    const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
793    nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
794 
795    nir_def *desc =
796       build_buffer_addr_for_res_index(b, desc_type, intrin->src[0].ssa,
797                                       addr_format, state);
798 
799    assert(intrin->def.bit_size == desc->bit_size);
800    assert(intrin->def.num_components == desc->num_components);
801    nir_def_rewrite_uses(&intrin->def, desc);
802    nir_instr_remove(&intrin->instr);
803 
804    return true;
805 }
806 
807 static bool
lower_get_ssbo_size(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)808 lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
809                     struct apply_pipeline_layout_state *state)
810 {
811    if (_mesa_set_search(state->lowered_instrs, intrin))
812       return false;
813 
814    b->cursor = nir_before_instr(&intrin->instr);
815 
816    nir_address_format addr_format =
817       addr_format_for_desc_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, state);
818 
819    nir_def *desc =
820       build_buffer_addr_for_res_index(b, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
821                                       intrin->src[0].ssa, addr_format, state);
822 
823    switch (addr_format) {
824    case nir_address_format_64bit_global_32bit_offset:
825    case nir_address_format_64bit_bounded_global: {
826       nir_def *size = nir_channel(b, desc, 2);
827       nir_def_rewrite_uses(&intrin->def, size);
828       nir_instr_remove(&intrin->instr);
829       break;
830    }
831 
832    case nir_address_format_32bit_index_offset:
833       /* The binding table index is the first component of the address.  The
834        * back-end wants a scalar binding table index source.
835        */
836       nir_src_rewrite(&intrin->src[0], nir_channel(b, desc, 0));
837       break;
838 
839    default:
840       unreachable("Unsupported address format");
841    }
842 
843    return true;
844 }
845 
846 static bool
image_binding_needs_lowered_surface(nir_variable * var)847 image_binding_needs_lowered_surface(nir_variable *var)
848 {
849    return !(var->data.access & ACCESS_NON_READABLE) &&
850           var->data.image.format != PIPE_FORMAT_NONE;
851 }
852 
853 static bool
lower_image_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)854 lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
855                       struct apply_pipeline_layout_state *state)
856 {
857    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
858    nir_variable *var = nir_deref_instr_get_variable(deref);
859 
860    unsigned set = var->data.descriptor_set;
861    unsigned binding = var->data.binding;
862    unsigned binding_offset = state->set[set].surface_offsets[binding];
863 
864    b->cursor = nir_before_instr(&intrin->instr);
865 
866    if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
867       b->cursor = nir_instr_remove(&intrin->instr);
868 
869       const unsigned param = nir_intrinsic_base(intrin);
870 
871       nir_def *desc =
872          build_load_var_deref_descriptor_mem(b, deref, param * 16,
873                                              intrin->def.num_components,
874                                              intrin->def.bit_size, state);
875 
876       nir_def_rewrite_uses(&intrin->def, desc);
877    } else {
878       nir_def *index = NULL;
879       if (deref->deref_type != nir_deref_type_var) {
880          assert(deref->deref_type == nir_deref_type_array);
881          index = deref->arr.index.ssa;
882       } else {
883          index = nir_imm_int(b, 0);
884       }
885 
886       index = nir_iadd_imm(b, index, binding_offset);
887       nir_rewrite_image_intrinsic(intrin, index, false);
888    }
889 
890    return true;
891 }
892 
893 static bool
lower_load_constant(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)894 lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
895                     struct apply_pipeline_layout_state *state)
896 {
897    b->cursor = nir_instr_remove(&intrin->instr);
898 
899    /* Any constant-offset load_constant instructions should have been removed
900     * by constant folding.
901     */
902    assert(!nir_src_is_const(intrin->src[0]));
903    nir_def *offset = nir_iadd_imm(b, intrin->src[0].ssa,
904                                       nir_intrinsic_base(intrin));
905 
906    nir_def *data;
907    if (!anv_use_relocations(state->pdevice)) {
908       unsigned load_size = intrin->def.num_components *
909                            intrin->def.bit_size / 8;
910       unsigned load_align = intrin->def.bit_size / 8;
911 
912       assert(load_size < b->shader->constant_data_size);
913       unsigned max_offset = b->shader->constant_data_size - load_size;
914       offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
915 
916       nir_def *const_data_base_addr = nir_pack_64_2x32_split(b,
917          nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_LOW),
918          nir_load_reloc_const_intel(b, ELK_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
919 
920       data = nir_load_global_constant(b, nir_iadd(b, const_data_base_addr,
921                                                      nir_u2u64(b, offset)),
922                                       load_align,
923                                       intrin->def.num_components,
924                                       intrin->def.bit_size);
925    } else {
926       nir_def *index = nir_imm_int(b, state->constants_offset);
927 
928       data = nir_load_ubo(b, intrin->num_components, intrin->def.bit_size,
929                           index, offset,
930                           .align_mul = intrin->def.bit_size / 8,
931                           .align_offset =  0,
932                           .range_base = nir_intrinsic_base(intrin),
933                           .range = nir_intrinsic_range(intrin));
934    }
935 
936    nir_def_rewrite_uses(&intrin->def, data);
937 
938    return true;
939 }
940 
941 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,struct apply_pipeline_layout_state * state)942 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
943                         struct apply_pipeline_layout_state *state)
944 {
945    b->cursor = nir_instr_remove(&intrin->instr);
946 
947    nir_def *base_workgroup_id =
948       nir_load_push_constant(b, 3, 32, nir_imm_int(b, 0),
949                              .base = offsetof(struct anv_push_constants, cs.base_work_group_id),
950                              .range = 3 * sizeof(uint32_t));
951    nir_def_rewrite_uses(&intrin->def, base_workgroup_id);
952 
953    return true;
954 }
955 
956 static void
lower_tex_deref(nir_builder * b,nir_tex_instr * tex,nir_tex_src_type deref_src_type,unsigned * base_index,unsigned plane,struct apply_pipeline_layout_state * state)957 lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
958                 nir_tex_src_type deref_src_type,
959                 unsigned *base_index, unsigned plane,
960                 struct apply_pipeline_layout_state *state)
961 {
962    int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
963    if (deref_src_idx < 0)
964       return;
965 
966    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
967    nir_variable *var = nir_deref_instr_get_variable(deref);
968 
969    unsigned set = var->data.descriptor_set;
970    unsigned binding = var->data.binding;
971    unsigned array_size =
972       state->layout->set[set].layout->binding[binding].array_size;
973 
974    unsigned binding_offset;
975    if (deref_src_type == nir_tex_src_texture_deref) {
976       binding_offset = state->set[set].surface_offsets[binding];
977    } else {
978       assert(deref_src_type == nir_tex_src_sampler_deref);
979       binding_offset = state->set[set].sampler_offsets[binding];
980    }
981 
982    nir_tex_src_type offset_src_type;
983    nir_def *index = NULL;
984    if (binding_offset > MAX_BINDING_TABLE_SIZE) {
985       const unsigned plane_offset =
986          plane * sizeof(struct anv_sampled_image_descriptor);
987 
988       nir_def *desc =
989          build_load_var_deref_descriptor_mem(b, deref, plane_offset,
990                                              2, 32, state);
991 
992       if (deref_src_type == nir_tex_src_texture_deref) {
993          offset_src_type = nir_tex_src_texture_handle;
994          index = nir_channel(b, desc, 0);
995       } else {
996          assert(deref_src_type == nir_tex_src_sampler_deref);
997          offset_src_type = nir_tex_src_sampler_handle;
998          index = nir_channel(b, desc, 1);
999       }
1000    } else {
1001       if (deref_src_type == nir_tex_src_texture_deref) {
1002          offset_src_type = nir_tex_src_texture_offset;
1003       } else {
1004          assert(deref_src_type == nir_tex_src_sampler_deref);
1005          offset_src_type = nir_tex_src_sampler_offset;
1006       }
1007 
1008       *base_index = binding_offset + plane;
1009 
1010       if (deref->deref_type != nir_deref_type_var) {
1011          assert(deref->deref_type == nir_deref_type_array);
1012 
1013          if (nir_src_is_const(deref->arr.index)) {
1014             unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
1015             struct anv_sampler **immutable_samplers =
1016                state->layout->set[set].layout->binding[binding].immutable_samplers;
1017             if (immutable_samplers) {
1018                /* Array of YCbCr samplers are tightly packed in the binding
1019                 * tables, compute the offset of an element in the array by
1020                 * adding the number of planes of all preceding elements.
1021                 */
1022                unsigned desc_arr_index = 0;
1023                for (int i = 0; i < arr_index; i++)
1024                   desc_arr_index += immutable_samplers[i]->n_planes;
1025                *base_index += desc_arr_index;
1026             } else {
1027                *base_index += arr_index;
1028             }
1029          } else {
1030             /* From VK_KHR_sampler_ycbcr_conversion:
1031              *
1032              * If sampler Y’CBCR conversion is enabled, the combined image
1033              * sampler must be indexed only by constant integral expressions
1034              * when aggregated into arrays in shader code, irrespective of
1035              * the shaderSampledImageArrayDynamicIndexing feature.
1036              */
1037             assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
1038 
1039             index = deref->arr.index.ssa;
1040          }
1041       }
1042    }
1043 
1044    if (index) {
1045       nir_src_rewrite(&tex->src[deref_src_idx].src, index);
1046       tex->src[deref_src_idx].src_type = offset_src_type;
1047    } else {
1048       nir_tex_instr_remove_src(tex, deref_src_idx);
1049    }
1050 }
1051 
1052 static uint32_t
tex_instr_get_and_remove_plane_src(nir_tex_instr * tex)1053 tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1054 {
1055    int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1056    if (plane_src_idx < 0)
1057       return 0;
1058 
1059    unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1060 
1061    nir_tex_instr_remove_src(tex, plane_src_idx);
1062 
1063    return plane;
1064 }
1065 
1066 static nir_def *
build_def_array_select(nir_builder * b,nir_def ** srcs,nir_def * idx,unsigned start,unsigned end)1067 build_def_array_select(nir_builder *b, nir_def **srcs, nir_def *idx,
1068                        unsigned start, unsigned end)
1069 {
1070    if (start == end - 1) {
1071       return srcs[start];
1072    } else {
1073       unsigned mid = start + (end - start) / 2;
1074       return nir_bcsel(b, nir_ilt_imm(b, idx, mid),
1075                        build_def_array_select(b, srcs, idx, start, mid),
1076                        build_def_array_select(b, srcs, idx, mid, end));
1077    }
1078 }
1079 
1080 static void
lower_gfx7_tex_swizzle(nir_builder * b,nir_tex_instr * tex,unsigned plane,struct apply_pipeline_layout_state * state)1081 lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
1082                        struct apply_pipeline_layout_state *state)
1083 {
1084    assert(state->pdevice->info.verx10 == 70);
1085    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
1086        nir_tex_instr_is_query(tex) ||
1087        tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
1088        (tex->is_shadow && tex->is_new_style_shadow))
1089       return;
1090 
1091    int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1092    assert(deref_src_idx >= 0);
1093 
1094    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1095    nir_variable *var = nir_deref_instr_get_variable(deref);
1096 
1097    unsigned set = var->data.descriptor_set;
1098    unsigned binding = var->data.binding;
1099    const struct anv_descriptor_set_binding_layout *bind_layout =
1100       &state->layout->set[set].layout->binding[binding];
1101 
1102    if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
1103       return;
1104 
1105    b->cursor = nir_before_instr(&tex->instr);
1106 
1107    const unsigned plane_offset =
1108       plane * sizeof(struct anv_texture_swizzle_descriptor);
1109    nir_def *swiz =
1110       build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1111                                           1, 32, state);
1112 
1113    b->cursor = nir_after_instr(&tex->instr);
1114 
1115    assert(tex->def.bit_size == 32);
1116    assert(tex->def.num_components == 4);
1117 
1118    /* Initializing to undef is ok; nir_opt_undef will clean it up. */
1119    nir_def *undef = nir_undef(b, 1, 32);
1120    nir_def *comps[8];
1121    for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
1122       comps[i] = undef;
1123 
1124    comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
1125    if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
1126       comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
1127    else
1128       comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
1129    comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->def, 0);
1130    comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->def, 1);
1131    comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->def, 2);
1132    comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->def, 3);
1133 
1134    nir_def *swiz_comps[4];
1135    for (unsigned i = 0; i < 4; i++) {
1136       nir_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
1137       swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
1138    }
1139    nir_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
1140 
1141    /* Rewrite uses before we insert so we don't rewrite this use */
1142    nir_def_rewrite_uses_after(&tex->def,
1143                                   swiz_tex_res,
1144                                   swiz_tex_res->parent_instr);
1145 }
1146 
1147 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct apply_pipeline_layout_state * state)1148 lower_tex(nir_builder *b, nir_tex_instr *tex,
1149           struct apply_pipeline_layout_state *state)
1150 {
1151    unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1152 
1153    /* On Ivy Bridge and Bay Trail, we have to swizzle in the shader.  Do this
1154     * before we lower the derefs away so we can still find the descriptor.
1155     */
1156    if (state->pdevice->info.verx10 == 70)
1157       lower_gfx7_tex_swizzle(b, tex, plane, state);
1158 
1159    b->cursor = nir_before_instr(&tex->instr);
1160 
1161    lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1162                    &tex->texture_index, plane, state);
1163 
1164    lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1165                    &tex->sampler_index, plane, state);
1166 
1167    return true;
1168 }
1169 
1170 static bool
apply_pipeline_layout(nir_builder * b,nir_instr * instr,void * _state)1171 apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1172 {
1173    struct apply_pipeline_layout_state *state = _state;
1174 
1175    switch (instr->type) {
1176    case nir_instr_type_intrinsic: {
1177       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1178       switch (intrin->intrinsic) {
1179       case nir_intrinsic_vulkan_resource_index:
1180          return lower_res_index_intrinsic(b, intrin, state);
1181       case nir_intrinsic_vulkan_resource_reindex:
1182          return lower_res_reindex_intrinsic(b, intrin, state);
1183       case nir_intrinsic_load_vulkan_descriptor:
1184          return lower_load_vulkan_descriptor(b, intrin, state);
1185       case nir_intrinsic_get_ssbo_size:
1186          return lower_get_ssbo_size(b, intrin, state);
1187       case nir_intrinsic_image_deref_load:
1188       case nir_intrinsic_image_deref_store:
1189       case nir_intrinsic_image_deref_atomic:
1190       case nir_intrinsic_image_deref_atomic_swap:
1191       case nir_intrinsic_image_deref_size:
1192       case nir_intrinsic_image_deref_samples:
1193       case nir_intrinsic_image_deref_load_param_intel:
1194       case nir_intrinsic_image_deref_load_raw_intel:
1195       case nir_intrinsic_image_deref_store_raw_intel:
1196          return lower_image_intrinsic(b, intrin, state);
1197       case nir_intrinsic_load_constant:
1198          return lower_load_constant(b, intrin, state);
1199       case nir_intrinsic_load_base_workgroup_id:
1200          return lower_base_workgroup_id(b, intrin, state);
1201       default:
1202          return false;
1203       }
1204       break;
1205    }
1206    case nir_instr_type_tex:
1207       return lower_tex(b, nir_instr_as_tex(instr), state);
1208    default:
1209       return false;
1210    }
1211 }
1212 
1213 struct binding_info {
1214    uint32_t binding;
1215    uint8_t set;
1216    uint16_t score;
1217 };
1218 
1219 static int
compare_binding_infos(const void * _a,const void * _b)1220 compare_binding_infos(const void *_a, const void *_b)
1221 {
1222    const struct binding_info *a = _a, *b = _b;
1223    if (a->score != b->score)
1224       return b->score - a->score;
1225 
1226    if (a->set != b->set)
1227       return a->set - b->set;
1228 
1229    return a->binding - b->binding;
1230 }
1231 
1232 void
anv_nir_apply_pipeline_layout(nir_shader * shader,const struct anv_physical_device * pdevice,enum elk_robustness_flags robust_flags,const struct anv_pipeline_layout * layout,struct anv_pipeline_bind_map * map)1233 anv_nir_apply_pipeline_layout(nir_shader *shader,
1234                               const struct anv_physical_device *pdevice,
1235                               enum elk_robustness_flags robust_flags,
1236                               const struct anv_pipeline_layout *layout,
1237                               struct anv_pipeline_bind_map *map)
1238 {
1239    void *mem_ctx = ralloc_context(NULL);
1240 
1241    struct apply_pipeline_layout_state state = {
1242       .pdevice = pdevice,
1243       .layout = layout,
1244       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
1245       .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
1246       .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
1247    };
1248 
1249    for (unsigned s = 0; s < layout->num_sets; s++) {
1250       const unsigned count = layout->set[s].layout->binding_count;
1251       state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count);
1252       state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1253       state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1254    }
1255 
1256    nir_shader_instructions_pass(shader, get_used_bindings,
1257                                 nir_metadata_all, &state);
1258 
1259    for (unsigned s = 0; s < layout->num_sets; s++) {
1260       if (state.set[s].desc_buffer_used) {
1261          map->surface_to_descriptor[map->surface_count] =
1262             (struct anv_pipeline_binding) {
1263                .set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
1264                .index = s,
1265             };
1266          state.set[s].desc_offset = map->surface_count;
1267          map->surface_count++;
1268       }
1269    }
1270 
1271    if (state.uses_constants && anv_use_relocations(pdevice)) {
1272       state.constants_offset = map->surface_count;
1273       map->surface_to_descriptor[map->surface_count].set =
1274          ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
1275       map->surface_count++;
1276    }
1277 
1278    unsigned used_binding_count = 0;
1279    for (uint32_t set = 0; set < layout->num_sets; set++) {
1280       struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1281       for (unsigned b = 0; b < set_layout->binding_count; b++) {
1282          if (state.set[set].use_count[b] == 0)
1283             continue;
1284 
1285          used_binding_count++;
1286       }
1287    }
1288 
1289    struct binding_info *infos =
1290       rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
1291    used_binding_count = 0;
1292    for (uint32_t set = 0; set < layout->num_sets; set++) {
1293       const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1294       for (unsigned b = 0; b < set_layout->binding_count; b++) {
1295          if (state.set[set].use_count[b] == 0)
1296             continue;
1297 
1298          const struct anv_descriptor_set_binding_layout *binding =
1299                &layout->set[set].layout->binding[b];
1300 
1301          /* Do a fixed-point calculation to generate a score based on the
1302           * number of uses and the binding array size.  We shift by 7 instead
1303           * of 8 because we're going to use the top bit below to make
1304           * everything which does not support bindless super higher priority
1305           * than things which do.
1306           */
1307          uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) /
1308                           binding->array_size;
1309 
1310          /* If the descriptor type doesn't support bindless then put it at the
1311           * beginning so we guarantee it gets a slot.
1312           */
1313          if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
1314              !anv_descriptor_supports_bindless(pdevice, binding, false))
1315             score |= 1 << 15;
1316 
1317          infos[used_binding_count++] = (struct binding_info) {
1318             .set = set,
1319             .binding = b,
1320             .score = score,
1321          };
1322       }
1323    }
1324 
1325    /* Order the binding infos based on score with highest scores first.  If
1326     * scores are equal we then order by set and binding.
1327     */
1328    qsort(infos, used_binding_count, sizeof(struct binding_info),
1329          compare_binding_infos);
1330 
1331    for (unsigned i = 0; i < used_binding_count; i++) {
1332       unsigned set = infos[i].set, b = infos[i].binding;
1333       const struct anv_descriptor_set_binding_layout *binding =
1334             &layout->set[set].layout->binding[b];
1335 
1336       const uint32_t array_size = binding->array_size;
1337 
1338       if (binding->dynamic_offset_index >= 0)
1339          state.has_dynamic_buffers = true;
1340 
1341       if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
1342          assert(map->surface_count + array_size <= MAX_BINDING_TABLE_SIZE);
1343          assert(!anv_descriptor_requires_bindless(pdevice, binding, false));
1344          state.set[set].surface_offsets[b] = map->surface_count;
1345          if (binding->dynamic_offset_index < 0) {
1346             struct anv_sampler **samplers = binding->immutable_samplers;
1347             for (unsigned i = 0; i < binding->array_size; i++) {
1348                uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1349                for (uint8_t p = 0; p < planes; p++) {
1350                   map->surface_to_descriptor[map->surface_count++] =
1351                      (struct anv_pipeline_binding) {
1352                         .set = set,
1353                         .index = binding->descriptor_index + i,
1354                         .plane = p,
1355                      };
1356                }
1357             }
1358          } else {
1359             for (unsigned i = 0; i < binding->array_size; i++) {
1360                map->surface_to_descriptor[map->surface_count++] =
1361                   (struct anv_pipeline_binding) {
1362                      .set = set,
1363                      .index = binding->descriptor_index + i,
1364                      .dynamic_offset_index =
1365                         layout->set[set].dynamic_offset_start +
1366                         binding->dynamic_offset_index + i,
1367                   };
1368             }
1369          }
1370          assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1371       }
1372 
1373       if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) {
1374          if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE ||
1375              anv_descriptor_requires_bindless(pdevice, binding, true)) {
1376             /* If this descriptor doesn't fit in the binding table or if it
1377              * requires bindless for some reason, flag it as bindless.
1378              *
1379              * We also make large sampler arrays bindless because we can avoid
1380              * using indirect sends thanks to bindless samplers being packed
1381              * less tightly than the sampler table.
1382              */
1383             assert(anv_descriptor_supports_bindless(pdevice, binding, true));
1384             state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
1385          } else {
1386             state.set[set].sampler_offsets[b] = map->sampler_count;
1387             struct anv_sampler **samplers = binding->immutable_samplers;
1388             for (unsigned i = 0; i < binding->array_size; i++) {
1389                uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1390                for (uint8_t p = 0; p < planes; p++) {
1391                   map->sampler_to_descriptor[map->sampler_count++] =
1392                      (struct anv_pipeline_binding) {
1393                         .set = set,
1394                         .index = binding->descriptor_index + i,
1395                         .plane = p,
1396                      };
1397                }
1398             }
1399          }
1400       }
1401    }
1402 
1403    nir_foreach_image_variable(var, shader) {
1404       const uint32_t set = var->data.descriptor_set;
1405       const uint32_t binding = var->data.binding;
1406       const struct anv_descriptor_set_binding_layout *bind_layout =
1407             &layout->set[set].layout->binding[binding];
1408       const uint32_t array_size = bind_layout->array_size;
1409 
1410       if (state.set[set].use_count[binding] == 0)
1411          continue;
1412 
1413       if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE)
1414          continue;
1415 
1416       struct anv_pipeline_binding *pipe_binding =
1417          &map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
1418       for (unsigned i = 0; i < array_size; i++) {
1419          assert(pipe_binding[i].set == set);
1420          assert(pipe_binding[i].index == bind_layout->descriptor_index + i);
1421 
1422          pipe_binding[i].lowered_storage_surface =
1423             image_binding_needs_lowered_surface(var);
1424       }
1425    }
1426 
1427    /* Before we do the normal lowering, we look for any SSBO operations
1428     * that we can lower to the BTI model and lower them up-front.  The BTI
1429     * model can perform better than the A64 model for a couple reasons:
1430     *
1431     *  1. 48-bit address calculations are potentially expensive and using
1432     *     the BTI model lets us simply compute 32-bit offsets and the
1433     *     hardware adds the 64-bit surface base address.
1434     *
1435     *  2. The BTI messages, because they use surface states, do bounds
1436     *     checking for us.  With the A64 model, we have to do our own
1437     *     bounds checking and this means wider pointers and extra
1438     *     calculations and branching in the shader.
1439     *
1440     * The solution to both of these is to convert things to the BTI model
1441     * opportunistically.  The reason why we need to do this as a pre-pass
1442     * is for two reasons:
1443     *
1444     *  1. The BTI model requires nir_address_format_32bit_index_offset
1445     *     pointers which are not the same type as the pointers needed for
1446     *     the A64 model.  Because all our derefs are set up for the A64
1447     *     model (in case we have variable pointers), we have to crawl all
1448     *     the way back to the vulkan_resource_index intrinsic and build a
1449     *     completely fresh index+offset calculation.
1450     *
1451     *  2. Because the variable-pointers-capable lowering that we do as part
1452     *     of apply_pipeline_layout_block is destructive (It really has to
1453     *     be to handle variable pointers properly), we've lost the deref
1454     *     information by the time we get to the load/store/atomic
1455     *     intrinsics in that pass.
1456     */
1457    nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
1458                                 nir_metadata_block_index |
1459                                 nir_metadata_dominance,
1460                                 &state);
1461 
1462    /* We just got rid of all the direct access.  Delete it so it's not in the
1463     * way when we do our indirect lowering.
1464     */
1465    nir_opt_dce(shader);
1466 
1467    nir_shader_instructions_pass(shader, apply_pipeline_layout,
1468                                 nir_metadata_block_index |
1469                                 nir_metadata_dominance,
1470                                 &state);
1471 
1472    ralloc_free(mem_ctx);
1473 
1474    /* Now that we're done computing the surface and sampler portions of the
1475     * bind map, hash them.  This lets us quickly determine if the actual
1476     * mapping has changed and not just a no-op pipeline change.
1477     */
1478    _mesa_sha1_compute(map->surface_to_descriptor,
1479                       map->surface_count * sizeof(struct anv_pipeline_binding),
1480                       map->surface_sha1);
1481    _mesa_sha1_compute(map->sampler_to_descriptor,
1482                       map->sampler_count * sizeof(struct anv_pipeline_binding),
1483                       map->sampler_sha1);
1484 }
1485