1 /*
2 * Copyright 2023 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "asahi/compiler/agx_nir.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "util/bitset.h"
9 #include "agx_state.h"
10 #include "nir.h"
11 #include "nir_builder_opcodes.h"
12 #include "nir_intrinsics.h"
13 #include "nir_intrinsics_indices.h"
14
15 /*
16 * Lower binding table textures and images to texture state registers and (if
17 * necessary) bindless access into an internal table mapped like additional
18 * texture state registers. The following layout is used:
19 *
20 * 1. Textures
21 * 2. Images (read/write interleaved)
22 */
23
24 static bool
lower_sampler(nir_builder * b,nir_tex_instr * tex)25 lower_sampler(nir_builder *b, nir_tex_instr *tex)
26 {
27 if (!nir_tex_instr_need_sampler(tex))
28 return false;
29
30 nir_def *index = nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
31 if (!index)
32 index = nir_imm_int(b, tex->sampler_index);
33
34 nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
35 nir_load_sampler_handle_agx(b, index));
36 return true;
37 }
38
39 static bool
lower(nir_builder * b,nir_instr * instr,void * data)40 lower(nir_builder *b, nir_instr *instr, void *data)
41 {
42 bool *uses_bindless_samplers = data;
43 bool progress = false;
44 bool force_bindless = agx_nir_needs_texture_crawl(instr);
45 b->cursor = nir_before_instr(instr);
46
47 if (instr->type == nir_instr_type_intrinsic) {
48 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
49 nir_intrinsic_op bindless_op;
50
51 #define CASE(op) \
52 case nir_intrinsic_##op: \
53 bindless_op = nir_intrinsic_bindless_##op; \
54 break;
55
56 switch (intr->intrinsic) {
57 CASE(image_load)
58 CASE(image_store)
59 CASE(image_size)
60 CASE(image_samples)
61 CASE(image_atomic)
62 CASE(image_atomic_swap)
63 default:
64 return false;
65 }
66 #undef CASE
67
68 nir_def *index = intr->src[0].ssa;
69 nir_scalar index_scalar = nir_scalar_resolved(index, 0);
70
71 /* Remap according to the driver layout */
72 unsigned offset = BITSET_LAST_BIT(b->shader->info.textures_used);
73
74 /* For reads and queries, we use the texture descriptor which is first.
75 * Writes and atomics use the PBE descriptor.
76 */
77 if (intr->intrinsic != nir_intrinsic_image_load &&
78 intr->intrinsic != nir_intrinsic_image_size &&
79 intr->intrinsic != nir_intrinsic_image_samples)
80 offset++;
81
82 /* If we can determine statically that the image fits in texture state
83 * registers, avoid lowering to bindless access.
84 */
85 if (nir_scalar_is_const(index_scalar) && !force_bindless) {
86 unsigned idx = (nir_scalar_as_uint(index_scalar) * 2) + offset;
87
88 if (idx < AGX_NUM_TEXTURE_STATE_REGS) {
89 nir_src_rewrite(&intr->src[0], nir_imm_intN_t(b, idx, 16));
90 return true;
91 }
92 }
93
94 nir_atomic_op op = nir_atomic_op_iadd /* irrelevant */;
95 if (nir_intrinsic_has_atomic_op(intr))
96 op = nir_intrinsic_atomic_op(intr);
97
98 /* Otherwise, lower to bindless */
99 intr->intrinsic = bindless_op;
100
101 if (nir_intrinsic_has_atomic_op(intr))
102 nir_intrinsic_set_atomic_op(intr, op);
103
104 /* The driver uploads enough null texture/PBE descriptors for robustness
105 * given the shader limit, but we still need to clamp since we're lowering
106 * to bindless so the hardware doesn't know the limit.
107 *
108 * The GL spec says out-of-bounds image indexing is undefined, but
109 * faulting is not acceptable for robustness.
110 */
111 index = nir_umin(
112 b, index,
113 nir_imm_intN_t(b, b->shader->info.num_images - 1, index->bit_size));
114
115 index = nir_iadd_imm(b, nir_imul_imm(b, index, 2), offset);
116 nir_src_rewrite(&intr->src[0], nir_load_texture_handle_agx(b, index));
117 } else if (instr->type == nir_instr_type_tex) {
118 nir_tex_instr *tex = nir_instr_as_tex(instr);
119
120 if (((BITSET_COUNT(b->shader->info.samplers_used) > 16) &&
121 (nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset) >= 0 ||
122 tex->sampler_index >= 16)) &&
123 lower_sampler(b, tex)) {
124 progress = true;
125 *uses_bindless_samplers = true;
126 }
127
128 /* Nothing to do for "real" bindless */
129 if (nir_tex_instr_src_index(tex, nir_tex_src_texture_handle) >= 0)
130 return progress;
131
132 /* Textures are mapped 1:1, so if we can prove it fits in a texture state
133 * register, use the texture state register.
134 */
135 if (tex->texture_index < AGX_NUM_TEXTURE_STATE_REGS &&
136 nir_tex_instr_src_index(tex, nir_tex_src_texture_offset) == -1 &&
137 !force_bindless)
138 return progress;
139
140 /* Otherwise, lower to bindless. Could be optimized. */
141 nir_def *index = nir_steal_tex_src(tex, nir_tex_src_texture_offset);
142 if (!index)
143 index = nir_imm_int(b, tex->texture_index);
144
145 /* As above */
146 index = nir_umin(
147 b, index,
148 nir_imm_intN_t(b, b->shader->info.num_textures - 1, index->bit_size));
149
150 nir_tex_instr_add_src(tex, nir_tex_src_texture_handle,
151 nir_load_texture_handle_agx(b, index));
152 }
153
154 return true;
155 }
156
157 bool
agx_nir_lower_bindings(nir_shader * shader,bool * uses_bindless_samplers)158 agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers)
159 {
160 /* First lower index to offset so we can lower more naturally */
161 bool progress = nir_lower_tex(
162 shader, &(nir_lower_tex_options){.lower_index_to_offset = true});
163
164 /* Next run constant folding so the constant optimizations above have a
165 * chance.
166 */
167 progress |= nir_opt_constant_folding(shader);
168
169 progress |= nir_shader_instructions_pass(
170 shader, lower, nir_metadata_control_flow, uses_bindless_samplers);
171 return progress;
172 }
173