• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "asahi/compiler/agx_nir.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "util/bitset.h"
9 #include "agx_state.h"
10 #include "nir.h"
11 #include "nir_builder_opcodes.h"
12 #include "nir_intrinsics.h"
13 #include "nir_intrinsics_indices.h"
14 
15 /*
16  * Lower binding table textures and images to texture state registers and (if
17  * necessary) bindless access into an internal table mapped like additional
18  * texture state registers. The following layout is used:
19  *
20  *    1. Textures
21  *    2. Images (read/write interleaved)
22  */
23 
24 static bool
lower_sampler(nir_builder * b,nir_tex_instr * tex)25 lower_sampler(nir_builder *b, nir_tex_instr *tex)
26 {
27    if (!nir_tex_instr_need_sampler(tex))
28       return false;
29 
30    nir_def *index = nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
31    if (!index)
32       index = nir_imm_int(b, tex->sampler_index);
33 
34    nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
35                          nir_load_sampler_handle_agx(b, index));
36    return true;
37 }
38 
39 static bool
lower(nir_builder * b,nir_instr * instr,void * data)40 lower(nir_builder *b, nir_instr *instr, void *data)
41 {
42    bool *uses_bindless_samplers = data;
43    bool progress = false;
44    bool force_bindless = agx_nir_needs_texture_crawl(instr);
45    b->cursor = nir_before_instr(instr);
46 
47    if (instr->type == nir_instr_type_intrinsic) {
48       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
49       nir_intrinsic_op bindless_op;
50 
51 #define CASE(op)                                                               \
52    case nir_intrinsic_##op:                                                    \
53       bindless_op = nir_intrinsic_bindless_##op;                               \
54       break;
55 
56       switch (intr->intrinsic) {
57          CASE(image_load)
58          CASE(image_store)
59          CASE(image_size)
60          CASE(image_samples)
61          CASE(image_atomic)
62          CASE(image_atomic_swap)
63       default:
64          return false;
65       }
66 #undef CASE
67 
68       nir_def *index = intr->src[0].ssa;
69       nir_scalar index_scalar = nir_scalar_resolved(index, 0);
70 
71       /* Remap according to the driver layout */
72       unsigned offset = BITSET_LAST_BIT(b->shader->info.textures_used);
73 
74       /* For reads and queries, we use the texture descriptor which is first.
75        * Writes and atomics use the PBE descriptor.
76        */
77       if (intr->intrinsic != nir_intrinsic_image_load &&
78           intr->intrinsic != nir_intrinsic_image_size &&
79           intr->intrinsic != nir_intrinsic_image_samples)
80          offset++;
81 
82       /* If we can determine statically that the image fits in texture state
83        * registers, avoid lowering to bindless access.
84        */
85       if (nir_scalar_is_const(index_scalar) && !force_bindless) {
86          unsigned idx = (nir_scalar_as_uint(index_scalar) * 2) + offset;
87 
88          if (idx < AGX_NUM_TEXTURE_STATE_REGS) {
89             nir_src_rewrite(&intr->src[0], nir_imm_intN_t(b, idx, 16));
90             return true;
91          }
92       }
93 
94       nir_atomic_op op = nir_atomic_op_iadd /* irrelevant */;
95       if (nir_intrinsic_has_atomic_op(intr))
96          op = nir_intrinsic_atomic_op(intr);
97 
98       /* Otherwise, lower to bindless */
99       intr->intrinsic = bindless_op;
100 
101       if (nir_intrinsic_has_atomic_op(intr))
102          nir_intrinsic_set_atomic_op(intr, op);
103 
104       /* The driver uploads enough null texture/PBE descriptors for robustness
105        * given the shader limit, but we still need to clamp since we're lowering
106        * to bindless so the hardware doesn't know the limit.
107        *
108        * The GL spec says out-of-bounds image indexing is undefined, but
109        * faulting is not acceptable for robustness.
110        */
111       index = nir_umin(
112          b, index,
113          nir_imm_intN_t(b, b->shader->info.num_images - 1, index->bit_size));
114 
115       index = nir_iadd_imm(b, nir_imul_imm(b, index, 2), offset);
116       nir_src_rewrite(&intr->src[0], nir_load_texture_handle_agx(b, index));
117    } else if (instr->type == nir_instr_type_tex) {
118       nir_tex_instr *tex = nir_instr_as_tex(instr);
119 
120       if (((BITSET_COUNT(b->shader->info.samplers_used) > 16) &&
121            (nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset) >= 0 ||
122             tex->sampler_index >= 16)) &&
123           lower_sampler(b, tex)) {
124          progress = true;
125          *uses_bindless_samplers = true;
126       }
127 
128       /* Nothing to do for "real" bindless */
129       if (nir_tex_instr_src_index(tex, nir_tex_src_texture_handle) >= 0)
130          return progress;
131 
132       /* Textures are mapped 1:1, so if we can prove it fits in a texture state
133        * register, use the texture state register.
134        */
135       if (tex->texture_index < AGX_NUM_TEXTURE_STATE_REGS &&
136           nir_tex_instr_src_index(tex, nir_tex_src_texture_offset) == -1 &&
137           !force_bindless)
138          return progress;
139 
140       /* Otherwise, lower to bindless. Could be optimized. */
141       nir_def *index = nir_steal_tex_src(tex, nir_tex_src_texture_offset);
142       if (!index)
143          index = nir_imm_int(b, tex->texture_index);
144 
145       /* As above */
146       index = nir_umin(
147          b, index,
148          nir_imm_intN_t(b, b->shader->info.num_textures - 1, index->bit_size));
149 
150       nir_tex_instr_add_src(tex, nir_tex_src_texture_handle,
151                             nir_load_texture_handle_agx(b, index));
152    }
153 
154    return true;
155 }
156 
157 bool
agx_nir_lower_bindings(nir_shader * shader,bool * uses_bindless_samplers)158 agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers)
159 {
160    /* First lower index to offset so we can lower more naturally */
161    bool progress = nir_lower_tex(
162       shader, &(nir_lower_tex_options){.lower_index_to_offset = true});
163 
164    /* Next run constant folding so the constant optimizations above have a
165     * chance.
166     */
167    progress |= nir_opt_constant_folding(shader);
168 
169    progress |= nir_shader_instructions_pass(
170       shader, lower, nir_metadata_control_flow, uses_bindless_samplers);
171    return progress;
172 }
173