1 /*
2 * Copyright © 2022 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* Implement query_size, query_levels, and query_samples by extracting the information from
8 * descriptors. This is expected to be faster than image_resinfo.
9 */
10
11 #include "ac_nir.h"
12 #include "nir_builder.h"
13 #include "amdgfxregs.h"
14
get_field(nir_builder * b,nir_def * desc,unsigned index,unsigned mask)15 static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
16 {
17 return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
18 }
19
handle_null_desc(nir_builder * b,nir_def * desc,nir_def * value)20 static nir_def *handle_null_desc(nir_builder *b, nir_def *desc, nir_def *value)
21 {
22 nir_def *is_null = nir_ieq_imm(b, nir_channel(b, desc, 1), 0);
23 return nir_bcsel(b, is_null, nir_imm_int(b, 0), value);
24 }
25
query_samples(nir_builder * b,nir_def * desc,enum glsl_sampler_dim dim)26 static nir_def *query_samples(nir_builder *b, nir_def *desc, enum glsl_sampler_dim dim)
27 {
28 nir_def *samples;
29
30 if (dim == GLSL_SAMPLER_DIM_MS) {
31 /* LAST_LEVEL contains log2(num_samples). */
32 samples = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
33 samples = nir_ishl(b, nir_imm_int(b, 1), samples);
34 } else {
35 samples = nir_imm_int(b, 1);
36 }
37
38 return handle_null_desc(b, desc, samples);
39 }
40
query_levels(nir_builder * b,nir_def * desc)41 static nir_def *query_levels(nir_builder *b, nir_def *desc)
42 {
43 nir_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
44 nir_def *last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
45
46 nir_def *levels = nir_iadd_imm(b, nir_isub(b, last_level, base_level), 1);
47
48 return handle_null_desc(b, desc, levels);
49 }
50
51 static nir_def *
lower_query_size(nir_builder * b,nir_def * desc,nir_src * lod,enum glsl_sampler_dim dim,bool is_array,enum amd_gfx_level gfx_level)52 lower_query_size(nir_builder *b, nir_def *desc, nir_src *lod,
53 enum glsl_sampler_dim dim, bool is_array, enum amd_gfx_level gfx_level)
54 {
55 if (dim == GLSL_SAMPLER_DIM_BUF) {
56 nir_def *size = nir_channel(b, desc, 2);
57
58 if (gfx_level == GFX8) {
59 /* On GFX8, the descriptor contains the size in bytes,
60 * but TXQ must return the size in elements.
61 * The stride is always non-zero for resources using TXQ.
62 * Divide the size by the stride.
63 */
64 size = nir_udiv(b, size, get_field(b, desc, 1, ~C_008F04_STRIDE));
65 }
66 return size;
67 }
68
69 /* Cube textures return (height, height) instead of (width, height) because it's fewer
70 * instructions.
71 */
72 bool has_width = dim != GLSL_SAMPLER_DIM_CUBE;
73 bool has_height = dim != GLSL_SAMPLER_DIM_1D;
74 bool has_depth = dim == GLSL_SAMPLER_DIM_3D;
75 nir_def *width = NULL, *height = NULL, *layers = NULL, *base_array = NULL;
76 nir_def *last_array = NULL, *depth = NULL;
77
78 /* Get the width, height, depth, layers. */
79 if (gfx_level >= GFX10) {
80 if (has_width) {
81 nir_def *width_lo = get_field(b, desc, 1, ~C_00A004_WIDTH_LO);
82 nir_def *width_hi = get_field(b, desc, 2, ~C_00A008_WIDTH_HI);
83 /* Use iadd to get s_lshl2_add_u32 in the end. */
84 width = nir_iadd(b, width_lo, nir_ishl_imm(b, width_hi, 2));
85 }
86 if (has_height)
87 height = get_field(b, desc, 2, ~C_00A008_HEIGHT);
88 if (has_depth)
89 depth = get_field(b, desc, 4, ~C_00A010_DEPTH);
90
91 if (is_array) {
92 last_array = get_field(b, desc, 4, ~C_00A010_DEPTH);
93 base_array = get_field(b, desc, 4, ~C_00A010_BASE_ARRAY);
94 }
95 } else {
96 if (has_width)
97 width = get_field(b, desc, 2, ~C_008F18_WIDTH);
98 if (has_height)
99 height = get_field(b, desc, 2, ~C_008F18_HEIGHT);
100 if (has_depth)
101 depth = get_field(b, desc, 4, ~C_008F20_DEPTH);
102
103 if (is_array) {
104 base_array = get_field(b, desc, 5, ~C_008F24_BASE_ARRAY);
105
106 if (gfx_level == GFX9) {
107 last_array = get_field(b, desc, 4, ~C_008F20_DEPTH);
108 } else {
109 last_array = get_field(b, desc, 5, ~C_008F24_LAST_ARRAY);
110 }
111 }
112 }
113
114 /* On GFX10.3+, DEPTH contains the pitch if the type is 1D, 2D, or 2D_MSAA. We only program
115 * the pitch for 2D. We need to set depth and last_array to 0 in that case.
116 */
117 if (gfx_level >= GFX10_3 && (has_depth || is_array)) {
118 nir_def *type = get_field(b, desc, 3, ~C_00A00C_TYPE);
119 nir_def *is_2d = nir_ieq_imm(b, type, V_008F1C_SQ_RSRC_IMG_2D);
120
121 if (has_depth)
122 depth = nir_bcsel(b, is_2d, nir_imm_int(b, 0), depth);
123 if (is_array)
124 last_array = nir_bcsel(b, is_2d, nir_imm_int(b, 0), last_array);
125 }
126
127 /* All values are off by 1. */
128 if (has_width)
129 width = nir_iadd_imm(b, width, 1);
130 if (has_height)
131 height = nir_iadd_imm(b, height, 1);
132 if (has_depth)
133 depth = nir_iadd_imm(b, depth, 1);
134
135 if (is_array) {
136 layers = nir_isub(b, last_array, base_array);
137 layers = nir_iadd_imm(b, layers, 1);
138 }
139
140 /* Minify the dimensions according to base_level + lod. */
141 if (dim != GLSL_SAMPLER_DIM_MS && dim != GLSL_SAMPLER_DIM_RECT) {
142 nir_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
143 nir_def *level = lod ? nir_iadd(b, base_level, lod->ssa) : base_level;
144
145 if (has_width)
146 width = nir_ushr(b, width, level);
147 if (has_height)
148 height = nir_ushr(b, height, level);
149 if (has_depth)
150 depth = nir_ushr(b, depth, level);
151
152 /* 1D and square texture can't have 0 size unless the lod is out-of-bounds, which is
153 * undefined. Only non-square targets can have one of the sizes 0 with an in-bounds lod
154 * after minification.
155 */
156 if (has_width && has_height) {
157 if (has_width)
158 width = nir_umax(b, width, nir_imm_int(b, 1));
159 if (has_height)
160 height = nir_umax(b, height, nir_imm_int(b, 1));
161 if (has_depth)
162 depth = nir_umax(b, depth, nir_imm_int(b, 1));
163 }
164 }
165
166 /* Special case for sliced storage 3D views which shouldn't be minified. */
167 if (gfx_level >= GFX10 && has_depth) {
168 nir_def *uav3d =
169 nir_ieq_imm(b, get_field(b, desc, 5, ~C_00A014_ARRAY_PITCH), 1);
170 nir_def *layers_3d =
171 nir_isub(b, get_field(b, desc, 4, ~C_00A010_DEPTH),
172 get_field(b, desc, 4, ~C_00A010_BASE_ARRAY));
173 layers_3d = nir_iadd_imm(b, layers_3d, 1);
174 depth = nir_bcsel(b, uav3d, layers_3d, depth);
175 }
176
177 nir_def *result = NULL;
178
179 /* Construct the result. */
180 switch (dim) {
181 case GLSL_SAMPLER_DIM_1D:
182 result = is_array ? nir_vec2(b, width, layers) : width;
183 break;
184 case GLSL_SAMPLER_DIM_CUBE:
185 result = is_array ? nir_vec3(b, height, height, layers) : nir_vec2(b, height, height);
186 break;
187 case GLSL_SAMPLER_DIM_2D:
188 case GLSL_SAMPLER_DIM_MS:
189 case GLSL_SAMPLER_DIM_RECT:
190 case GLSL_SAMPLER_DIM_EXTERNAL:
191 result = is_array ? nir_vec3(b, width, height, layers) : nir_vec2(b, width, height);
192 break;
193 case GLSL_SAMPLER_DIM_3D:
194 result = nir_vec3(b, width, height, depth);
195 break;
196 default:
197 unreachable("invalid sampler dim");
198 }
199
200 return handle_null_desc(b, desc, result);
201 }
202
lower_resinfo(nir_builder * b,nir_instr * instr,void * data)203 static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
204 {
205 enum amd_gfx_level gfx_level = *(enum amd_gfx_level*)data;
206 nir_def *result = NULL, *dst = NULL;
207
208 if (instr->type == nir_instr_type_intrinsic) {
209 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
210 const struct glsl_type *type;
211 enum glsl_sampler_dim dim;
212 bool is_array;
213 nir_def *desc = NULL;
214
215 dst = &intr->def;
216 b->cursor = nir_before_instr(instr);
217
218 switch (intr->intrinsic) {
219 case nir_intrinsic_image_size:
220 case nir_intrinsic_image_samples:
221 dim = nir_intrinsic_image_dim(intr);
222 is_array = nir_intrinsic_image_array(intr);
223 desc = nir_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
224 32, intr->src[0].ssa);
225 break;
226
227 case nir_intrinsic_image_deref_size:
228 case nir_intrinsic_image_deref_samples:
229 type = nir_instr_as_deref(intr->src[0].ssa->parent_instr)->type;
230 dim = glsl_get_sampler_dim(type);
231 is_array = glsl_sampler_type_is_array(type);
232 desc = nir_image_deref_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
233 32, intr->src[0].ssa);
234 break;
235
236 case nir_intrinsic_bindless_image_size:
237 case nir_intrinsic_bindless_image_samples:
238 dim = nir_intrinsic_image_dim(intr);
239 is_array = nir_intrinsic_image_array(intr);
240 desc = nir_bindless_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
241 32, intr->src[0].ssa);
242 break;
243
244 default:
245 return false;
246 }
247
248 switch (intr->intrinsic) {
249 case nir_intrinsic_image_size:
250 case nir_intrinsic_image_deref_size:
251 case nir_intrinsic_bindless_image_size:
252 result = lower_query_size(b, desc, NULL, dim, is_array, gfx_level);
253 break;
254
255 case nir_intrinsic_image_samples:
256 case nir_intrinsic_image_deref_samples:
257 case nir_intrinsic_bindless_image_samples:
258 result = query_samples(b, desc, dim);
259 break;
260
261 default:
262 assert(!desc);
263 return false;
264 }
265 } else if (instr->type == nir_instr_type_tex) {
266 nir_tex_instr *tex = nir_instr_as_tex(instr);
267 nir_tex_instr *new_tex;
268 nir_def *desc = NULL;
269 nir_src *lod = NULL;
270
271 dst = &tex->def;
272 b->cursor = nir_before_instr(instr);
273
274 switch (tex->op) {
275 case nir_texop_txs:
276 case nir_texop_query_levels:
277 case nir_texop_texture_samples:
278 for (unsigned i = 0; i < tex->num_srcs; i++) {
279 switch (tex->src[i].src_type) {
280 case nir_tex_src_texture_deref:
281 case nir_tex_src_texture_handle:
282 new_tex = nir_tex_instr_create(b->shader, 1);
283 new_tex->op = nir_texop_descriptor_amd;
284 new_tex->sampler_dim = tex->sampler_dim;
285 new_tex->is_array = tex->is_array;
286 new_tex->texture_index = tex->texture_index;
287 new_tex->sampler_index = tex->sampler_index;
288 new_tex->dest_type = nir_type_int32;
289 new_tex->src[0].src = nir_src_for_ssa(tex->src[i].src.ssa);
290 new_tex->src[0].src_type = tex->src[i].src_type;
291 nir_def_init(&new_tex->instr, &new_tex->def,
292 nir_tex_instr_dest_size(new_tex), 32);
293 nir_builder_instr_insert(b, &new_tex->instr);
294 desc = &new_tex->def;
295 break;
296
297 case nir_tex_src_lod:
298 lod = &tex->src[i].src;
299 break;
300
301 default:;
302 }
303 }
304
305 switch (tex->op) {
306 case nir_texop_txs:
307 result = lower_query_size(b, desc, lod, tex->sampler_dim, tex->is_array,
308 gfx_level);
309 break;
310 case nir_texop_query_levels:
311 result = query_levels(b, desc);
312 break;
313 case nir_texop_texture_samples:
314 result = query_samples(b, desc, tex->sampler_dim);
315 break;
316 default:
317 unreachable("shouldn't get here");
318 }
319 break;
320
321 default:
322 return false;
323 }
324 }
325
326 if (!result)
327 return false;
328
329 nir_def_rewrite_uses_after(dst, result, instr);
330 nir_instr_remove(instr);
331 return true;
332 }
333
ac_nir_lower_resinfo(nir_shader * nir,enum amd_gfx_level gfx_level)334 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level)
335 {
336 return nir_shader_instructions_pass(nir, lower_resinfo,
337 nir_metadata_dominance |
338 nir_metadata_block_index,
339 &gfx_level);
340 }
341