• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "ac_descriptors.h"
7 #include "ac_shader_util.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10 #include "radv_descriptor_set.h"
11 #include "radv_device.h"
12 #include "radv_nir.h"
13 #include "radv_physical_device.h"
14 #include "radv_shader.h"
15 #include "radv_shader_args.h"
16 #include "sid.h"
17 
18 typedef struct {
19    enum amd_gfx_level gfx_level;
20    uint32_t address32_hi;
21    bool disable_aniso_single_level;
22    bool has_image_load_dcc_bug;
23    bool disable_tg4_trunc_coord;
24 
25    const struct radv_shader_args *args;
26    const struct radv_shader_info *info;
27    const struct radv_shader_layout *layout;
28 } apply_layout_state;
29 
30 static nir_def *
get_scalar_arg(nir_builder * b,unsigned size,struct ac_arg arg)31 get_scalar_arg(nir_builder *b, unsigned size, struct ac_arg arg)
32 {
33    assert(arg.used);
34    return nir_load_scalar_arg_amd(b, size, .base = arg.arg_index);
35 }
36 
37 static nir_def *
convert_pointer_to_64_bit(nir_builder * b,apply_layout_state * state,nir_def * ptr)38 convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_def *ptr)
39 {
40    return nir_pack_64_2x32_split(b, ptr, nir_imm_int(b, state->address32_hi));
41 }
42 
43 static nir_def *
load_desc_ptr(nir_builder * b,apply_layout_state * state,unsigned set)44 load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)
45 {
46    const struct radv_userdata_locations *user_sgprs_locs = &state->info->user_sgprs_locs;
47    if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
48       nir_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
49       addr = convert_pointer_to_64_bit(b, state, addr);
50       return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4));
51    }
52 
53    assert(state->args->descriptor_sets[set].used);
54    return get_scalar_arg(b, 1, state->args->descriptor_sets[set]);
55 }
56 
57 static void
visit_vulkan_resource_index(nir_builder * b,apply_layout_state * state,nir_intrinsic_instr * intrin)58 visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
59 {
60    unsigned desc_set = nir_intrinsic_desc_set(intrin);
61    unsigned binding = nir_intrinsic_binding(intrin);
62    struct radv_descriptor_set_layout *layout = state->layout->set[desc_set].layout;
63    unsigned offset = layout->binding[binding].offset;
64    unsigned stride;
65 
66    nir_def *set_ptr;
67    if (vk_descriptor_type_is_dynamic(layout->binding[binding].type)) {
68       unsigned idx = state->layout->set[desc_set].dynamic_offset_start + layout->binding[binding].dynamic_offset_offset;
69       set_ptr = get_scalar_arg(b, 1, state->args->ac.push_constants);
70       offset = state->layout->push_constant_size + idx * 16;
71       stride = 16;
72    } else {
73       set_ptr = load_desc_ptr(b, state, desc_set);
74       stride = layout->binding[binding].size;
75    }
76 
77    nir_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride);
78    nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
79 
80    binding_ptr = nir_iadd_imm(b, binding_ptr, offset);
81    nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
82 
83    if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
84       assert(stride == 16);
85       nir_def_rewrite_uses(&intrin->def, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
86    } else {
87       nir_def_rewrite_uses(&intrin->def, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
88    }
89    nir_instr_remove(&intrin->instr);
90 }
91 
92 static void
visit_vulkan_resource_reindex(nir_builder * b,apply_layout_state * state,nir_intrinsic_instr * intrin)93 visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
94 {
95    VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
96    if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
97       nir_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
98       nir_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
99 
100       nir_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16);
101       nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
102 
103       binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
104 
105       nir_def_rewrite_uses(&intrin->def, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
106    } else {
107       assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
108 
109       nir_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
110       nir_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
111 
112       nir_def *index = nir_imul(b, intrin->src[1].ssa, stride);
113       nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
114 
115       binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
116 
117       nir_def_rewrite_uses(&intrin->def, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
118    }
119    nir_instr_remove(&intrin->instr);
120 }
121 
122 static void
visit_load_vulkan_descriptor(nir_builder * b,apply_layout_state * state,nir_intrinsic_instr * intrin)123 visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
124 {
125    if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
126       nir_def *addr = convert_pointer_to_64_bit(b, state,
127                                                 nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
128                                                          nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
129       nir_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
130 
131       nir_def_rewrite_uses(&intrin->def, desc);
132    } else {
133       nir_def_rewrite_uses(&intrin->def, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
134    }
135    nir_instr_remove(&intrin->instr);
136 }
137 
138 static nir_def *
load_inline_buffer_descriptor(nir_builder * b,apply_layout_state * state,nir_def * rsrc)139 load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc)
140 {
141    uint32_t desc[4];
142 
143    ac_build_raw_buffer_descriptor(state->gfx_level, (uint64_t)state->address32_hi << 32, 0xffffffff, desc);
144 
145    return nir_vec4(b, rsrc, nir_imm_int(b, desc[1]), nir_imm_int(b, desc[2]), nir_imm_int(b, desc[3]));
146 }
147 
148 static nir_def *
load_buffer_descriptor(nir_builder * b,apply_layout_state * state,nir_def * rsrc,unsigned access)149 load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc, unsigned access)
150 {
151    nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc));
152 
153    /* If binding.success=false, then this is a variable pointer, which we don't support with
154     * VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK.
155     */
156    if (binding.success) {
157       struct radv_descriptor_set_layout *layout = state->layout->set[binding.desc_set].layout;
158       if (layout->binding[binding.binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
159          rsrc = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
160          return load_inline_buffer_descriptor(b, state, rsrc);
161       }
162    }
163 
164    if (access & ACCESS_NON_UNIFORM)
165       return nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
166 
167    nir_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
168    return nir_load_smem_amd(b, 4, desc_set, nir_channel(b, rsrc, 1), .align_mul = 16);
169 }
170 
171 static void
visit_get_ssbo_size(nir_builder * b,apply_layout_state * state,nir_intrinsic_instr * intrin)172 visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
173 {
174    nir_def *rsrc = intrin->src[0].ssa;
175 
176    nir_def *size;
177    if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) {
178       nir_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
179       ptr = nir_iadd_imm(b, ptr, 8);
180       ptr = convert_pointer_to_64_bit(b, state, ptr);
181       size = nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, .align_mul = 16,
182                                    .align_offset = 4);
183    } else {
184       /* load the entire descriptor so it can be CSE'd */
185       nir_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
186       nir_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
187       size = nir_channel(b, desc, 2);
188    }
189 
190    nir_def_replace(&intrin->def, size);
191 }
192 
193 static nir_def *
get_sampler_desc(nir_builder * b,apply_layout_state * state,nir_deref_instr * deref,enum ac_descriptor_type desc_type,bool non_uniform,nir_tex_instr * tex,bool write)194 get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, enum ac_descriptor_type desc_type,
195                  bool non_uniform, nir_tex_instr *tex, bool write)
196 {
197    nir_variable *var = nir_deref_instr_get_variable(deref);
198    assert(var);
199    unsigned desc_set = var->data.descriptor_set;
200    unsigned binding_index = var->data.binding;
201    bool indirect = nir_deref_instr_has_indirect(deref);
202 
203    struct radv_descriptor_set_layout *layout = state->layout->set[desc_set].layout;
204    struct radv_descriptor_set_binding_layout *binding = &layout->binding[binding_index];
205 
206    /* Handle immutable and embedded (compile-time) samplers
207     * (VkDescriptorSetLayoutBinding::pImmutableSamplers) We can only do this for constant array
208     * index or if all samplers in the array are the same. Note that indexing is forbidden with
209     * embedded samplers.
210     */
211    if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
212        (!indirect || binding->immutable_samplers_equal)) {
213       unsigned constant_index = 0;
214       if (!binding->immutable_samplers_equal) {
215          while (deref->deref_type != nir_deref_type_var) {
216             assert(deref->deref_type == nir_deref_type_array);
217             unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
218             constant_index += nir_src_as_uint(deref->arr.index) * array_size;
219             deref = nir_deref_instr_parent(deref);
220          }
221       }
222 
223       uint32_t dword0_mask =
224          tex->op == nir_texop_tg4 && state->disable_tg4_trunc_coord ? C_008F30_TRUNC_COORD : 0xffffffffu;
225       const uint32_t *samplers = radv_immutable_samplers(layout, binding);
226       return nir_imm_ivec4(b, samplers[constant_index * 4 + 0] & dword0_mask, samplers[constant_index * 4 + 1],
227                            samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]);
228    }
229 
230    unsigned size = 8;
231    unsigned offset = binding->offset;
232    switch (desc_type) {
233    case AC_DESC_IMAGE:
234    case AC_DESC_PLANE_0:
235       break;
236    case AC_DESC_FMASK:
237    case AC_DESC_PLANE_1:
238       offset += 32;
239       break;
240    case AC_DESC_SAMPLER:
241       size = 4;
242       if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
243          offset += radv_combined_image_descriptor_sampler_offset(binding);
244       break;
245    case AC_DESC_BUFFER:
246       size = 4;
247       break;
248    case AC_DESC_PLANE_2:
249       size = 4;
250       offset += 64;
251       break;
252    }
253 
254    nir_def *index = NULL;
255    while (deref->deref_type != nir_deref_type_var) {
256       assert(deref->deref_type == nir_deref_type_array);
257       unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
258       array_size *= binding->size;
259 
260       nir_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
261       if (tmp != deref->arr.index.ssa)
262          nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true;
263 
264       if (index) {
265          index = nir_iadd(b, tmp, index);
266          nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
267       } else {
268          index = tmp;
269       }
270 
271       deref = nir_deref_instr_parent(deref);
272    }
273 
274    nir_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
275    if (index && index_offset != index)
276       nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true;
277 
278    if (non_uniform)
279       return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
280 
281    nir_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
282    nir_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
283 
284    /* 3 plane formats always have same size and format for plane 1 & 2, so
285     * use the tail from plane 1 so that we can store only the first 16 bytes
286     * of the last plane. */
287    if (desc_type == AC_DESC_PLANE_2) {
288       nir_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
289 
290       nir_def *comp[8];
291       for (unsigned i = 0; i < 4; i++)
292          comp[i] = nir_channel(b, desc, i);
293       for (unsigned i = 4; i < 8; i++)
294          comp[i] = nir_channel(b, desc2, i);
295 
296       return nir_vec(b, comp, 8);
297    } else if (desc_type == AC_DESC_IMAGE && state->has_image_load_dcc_bug && !tex && !write) {
298       nir_def *comp[8];
299       for (unsigned i = 0; i < 8; i++)
300          comp[i] = nir_channel(b, desc, i);
301 
302       /* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a
303        * hardware bug.
304        */
305       comp[6] = nir_iand_imm(b, comp[6], C_00A018_WRITE_COMPRESS_ENABLE);
306 
307       return nir_vec(b, comp, 8);
308    } else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4 && state->disable_tg4_trunc_coord) {
309       nir_def *comp[4];
310       for (unsigned i = 0; i < 4; i++)
311          comp[i] = nir_channel(b, desc, i);
312 
313       /* We want to always use the linear filtering truncation behaviour for
314        * nir_texop_tg4, even if the sampler uses nearest/point filtering.
315        */
316       comp[0] = nir_iand_imm(b, comp[0], C_008F30_TRUNC_COORD);
317 
318       return nir_vec(b, comp, 4);
319    }
320 
321    return desc;
322 }
323 
324 static void
update_image_intrinsic(nir_builder * b,apply_layout_state * state,nir_intrinsic_instr * intrin)325 update_image_intrinsic(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
326 {
327    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
328    const enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
329    bool is_load =
330       intrin->intrinsic == nir_intrinsic_image_deref_load || intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
331 
332    nir_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
333                                     nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
334 
335    if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
336       nir_def_replace(&intrin->def, desc);
337    } else {
338       nir_rewrite_image_intrinsic(intrin, desc, true);
339    }
340 }
341 
342 static bool
can_increase_load_size(nir_intrinsic_instr * intrin,unsigned offset,unsigned old,unsigned new)343 can_increase_load_size(nir_intrinsic_instr *intrin, unsigned offset, unsigned old, unsigned new)
344 {
345    /* Only increase the size of loads if doing so won't extend into a new page/cache-line. */
346    unsigned align_mul = MIN2(nir_intrinsic_align_mul(intrin), 64u);
347    unsigned end = (nir_intrinsic_align_offset(intrin) + offset + old) & (align_mul - 1);
348    return (new - old) <= (align_mul - end);
349 }
350 
351 static nir_def *
load_push_constant(nir_builder * b,apply_layout_state * state,nir_intrinsic_instr * intrin)352 load_push_constant(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
353 {
354    unsigned base = nir_intrinsic_base(intrin);
355    unsigned bit_size = intrin->def.bit_size;
356    unsigned count = intrin->def.num_components * (bit_size / 32u);
357    assert(bit_size >= 32);
358 
359    nir_def *addr = NULL;
360    nir_def *offset = NULL;
361    unsigned const_offset = -1;
362    if (nir_src_is_const(intrin->src[0]))
363       const_offset = (base + nir_src_as_uint(intrin->src[0])) / 4u;
364 
365    const unsigned max_push_constant = sizeof(state->args->ac.inline_push_const_mask) * 8u;
366 
367    nir_component_mask_t comps_read = nir_def_components_read(&intrin->def);
368 
369    nir_def *data[NIR_MAX_VEC_COMPONENTS * 2];
370    unsigned num_loads = 0;
371    for (unsigned start = 0; start < count;) {
372       if (!(comps_read & BITFIELD64_BIT(start >> (bit_size == 64 ? 1 : 0)))) {
373          data[num_loads++] = nir_undef(b, 1, 32);
374          start += 1;
375          continue;
376       }
377 
378       /* Try to use inline push constants when possible. */
379       unsigned inline_idx = const_offset + start;
380       if (const_offset != -1 && inline_idx < max_push_constant &&
381           (state->args->ac.inline_push_const_mask & BITFIELD64_BIT(inline_idx))) {
382          inline_idx = util_bitcount64(state->args->ac.inline_push_const_mask & BITFIELD64_MASK(inline_idx));
383          data[num_loads++] = get_scalar_arg(b, 1, state->args->ac.inline_push_consts[inline_idx]);
384          start += 1;
385          continue;
386       }
387 
388       if (!state->args->ac.push_constants.used) {
389          /* Assume this is an inlined push constant load which was expanded to include dwords which are not inlined. */
390          assert(const_offset != -1);
391          data[num_loads++] = nir_undef(b, 1, 32);
392          start += 1;
393          continue;
394       }
395 
396       if (!offset) {
397          addr = get_scalar_arg(b, 1, state->args->ac.push_constants);
398          addr = convert_pointer_to_64_bit(b, state, addr);
399          offset = nir_iadd_imm_nuw(b, intrin->src[0].ssa, base);
400       }
401       unsigned size = 1 << (util_last_bit(count - start) - 1); /* Round down to power of two. */
402       /* Try to round up to power of two instead. */
403       if (size < (count - start) && can_increase_load_size(intrin, start * 4, size, size * 2))
404          size *= 2;
405 
406       data[num_loads++] = nir_load_smem_amd(b, size, addr, nir_iadd_imm_nuw(b, offset, start * 4));
407       start += size;
408    }
409    return nir_extract_bits(b, data, num_loads, 0, intrin->def.num_components, bit_size);
410 }
411 
412 static void
apply_layout_to_intrin(nir_builder * b,apply_layout_state * state,nir_intrinsic_instr * intrin)413 apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
414 {
415    b->cursor = nir_before_instr(&intrin->instr);
416 
417    nir_def *rsrc;
418    switch (intrin->intrinsic) {
419    case nir_intrinsic_vulkan_resource_index:
420       visit_vulkan_resource_index(b, state, intrin);
421       break;
422    case nir_intrinsic_vulkan_resource_reindex:
423       visit_vulkan_resource_reindex(b, state, intrin);
424       break;
425    case nir_intrinsic_load_vulkan_descriptor:
426       visit_load_vulkan_descriptor(b, state, intrin);
427       break;
428    case nir_intrinsic_load_ubo:
429    case nir_intrinsic_load_ssbo:
430    case nir_intrinsic_ssbo_atomic:
431    case nir_intrinsic_ssbo_atomic_swap:
432       rsrc = load_buffer_descriptor(b, state, intrin->src[0].ssa, nir_intrinsic_access(intrin));
433       nir_src_rewrite(&intrin->src[0], rsrc);
434       break;
435    case nir_intrinsic_store_ssbo:
436       rsrc = load_buffer_descriptor(b, state, intrin->src[1].ssa, nir_intrinsic_access(intrin));
437       nir_src_rewrite(&intrin->src[1], rsrc);
438       break;
439    case nir_intrinsic_get_ssbo_size:
440       visit_get_ssbo_size(b, state, intrin);
441       break;
442    case nir_intrinsic_image_deref_load:
443    case nir_intrinsic_image_deref_sparse_load:
444    case nir_intrinsic_image_deref_store:
445    case nir_intrinsic_image_deref_atomic:
446    case nir_intrinsic_image_deref_atomic_swap:
447    case nir_intrinsic_image_deref_size:
448    case nir_intrinsic_image_deref_samples:
449    case nir_intrinsic_image_deref_descriptor_amd:
450       update_image_intrinsic(b, state, intrin);
451       break;
452    case nir_intrinsic_load_push_constant: {
453       nir_def_replace(&intrin->def, load_push_constant(b, state, intrin));
454       break;
455    }
456    default:
457       break;
458    }
459 }
460 
461 static void
apply_layout_to_tex(nir_builder * b,apply_layout_state * state,nir_tex_instr * tex)462 apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *tex)
463 {
464    b->cursor = nir_before_instr(&tex->instr);
465 
466    nir_deref_instr *texture_deref_instr = NULL;
467    nir_deref_instr *sampler_deref_instr = NULL;
468    int plane = -1;
469 
470    for (unsigned i = 0; i < tex->num_srcs; i++) {
471       switch (tex->src[i].src_type) {
472       case nir_tex_src_texture_deref:
473          texture_deref_instr = nir_src_as_deref(tex->src[i].src);
474          break;
475       case nir_tex_src_sampler_deref:
476          sampler_deref_instr = nir_src_as_deref(tex->src[i].src);
477          break;
478       case nir_tex_src_plane:
479          plane = nir_src_as_int(tex->src[i].src);
480          break;
481       default:
482          break;
483       }
484    }
485 
486    nir_def *image = NULL;
487    nir_def *sampler = NULL;
488    if (plane >= 0) {
489       assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
490       assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
491       image =
492          get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane, tex->texture_non_uniform, tex, false);
493    } else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
494       image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER, tex->texture_non_uniform, tex, false);
495    } else if (tex->op == nir_texop_fragment_mask_fetch_amd) {
496       image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK, tex->texture_non_uniform, tex, false);
497    } else {
498       image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE, tex->texture_non_uniform, tex, false);
499    }
500 
501    if (sampler_deref_instr) {
502       sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER, tex->sampler_non_uniform, tex, false);
503 
504       if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT && state->gfx_level < GFX8) {
505          /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
506           *
507           * GFX6-GFX7:
508           *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
509           *   filtering manually. The driver sets img7 to a mask clearing
510           *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
511           *     s_and_b32 samp0, samp0, img7
512           *
513           * GFX8:
514           *   The ANISO_OVERRIDE sampler field enables this fix in TA.
515           */
516          /* TODO: This is unnecessary for combined image+sampler.
517           * We can do this when updating the desc set. */
518          nir_def *comp[4];
519          for (unsigned i = 0; i < 4; i++)
520             comp[i] = nir_channel(b, sampler, i);
521          comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7));
522 
523          sampler = nir_vec(b, comp, 4);
524       }
525    }
526 
527    if (tex->op == nir_texop_descriptor_amd) {
528       nir_def_replace(&tex->def, image);
529       return;
530    }
531 
532    for (unsigned i = 0; i < tex->num_srcs; i++) {
533       switch (tex->src[i].src_type) {
534       case nir_tex_src_texture_deref:
535          tex->src[i].src_type = nir_tex_src_texture_handle;
536          nir_src_rewrite(&tex->src[i].src, image);
537          break;
538       case nir_tex_src_sampler_deref:
539          tex->src[i].src_type = nir_tex_src_sampler_handle;
540          nir_src_rewrite(&tex->src[i].src, sampler);
541          break;
542       default:
543          break;
544       }
545    }
546 }
547 
548 void
radv_nir_apply_pipeline_layout(nir_shader * shader,struct radv_device * device,const struct radv_shader_stage * stage)549 radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
550 {
551    const struct radv_physical_device *pdev = radv_device_physical(device);
552    const struct radv_instance *instance = radv_physical_device_instance(pdev);
553 
554    apply_layout_state state = {
555       .gfx_level = pdev->info.gfx_level,
556       .address32_hi = pdev->info.address32_hi,
557       .disable_aniso_single_level = instance->drirc.disable_aniso_single_level,
558       .has_image_load_dcc_bug = pdev->info.has_image_load_dcc_bug,
559       .disable_tg4_trunc_coord = !pdev->info.conformant_trunc_coord && !device->disable_trunc_coord,
560       .args = &stage->args,
561       .info = &stage->info,
562       .layout = &stage->layout,
563    };
564 
565    nir_builder b;
566 
567    nir_foreach_function (function, shader) {
568       if (!function->impl)
569          continue;
570 
571       b = nir_builder_create(function->impl);
572 
573       /* Iterate in reverse so load_ubo lowering can look at
574        * the vulkan_resource_index to tell if it's an inline
575        * ubo.
576        */
577       nir_foreach_block_reverse (block, function->impl) {
578          nir_foreach_instr_reverse_safe (instr, block) {
579             if (instr->type == nir_instr_type_tex)
580                apply_layout_to_tex(&b, &state, nir_instr_as_tex(instr));
581             else if (instr->type == nir_instr_type_intrinsic)
582                apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr));
583          }
584       }
585 
586       nir_metadata_preserve(function->impl, nir_metadata_control_flow);
587    }
588 }
589