1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* This lowers image and texture opcodes to typed buffer opcodes (equivalent to image buffers)
8 * for some CDNA chips. Sampler buffers and image buffers are not lowered.
9 *
10 * Only the subset of opcodes and states that is used by VAAPI and OpenMAX is lowered.
11 * That means CLAMP_TO_EDGE is always used. Only level 0 can be accessed. The minification
12 * and magnification filter settings are assumed to be equal.
13 *
14 * This uses a custom image descriptor that is used in conjunction with this pass. The first
15 * 4 dwords of the descriptor contain the buffer descriptor where the format matches the image
16 * format and the stride matches the pixel size, and the last 4 dwords contain parameters
17 * for manual address computations and bounds checking like the pitch, the number of elements
18 * per slice, etc.
19 *
20 */
21
22 #include "ac_nir.h"
23 #include "nir_builder.h"
24 #include "amdgfxregs.h"
25
get_field(nir_builder * b,nir_def * desc,unsigned index,unsigned mask)26 static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
27 {
28 return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
29 }
30
get_coord_components(enum glsl_sampler_dim dim,bool is_array)31 static unsigned get_coord_components(enum glsl_sampler_dim dim, bool is_array)
32 {
33 switch (dim) {
34 case GLSL_SAMPLER_DIM_1D:
35 return is_array ? 2 : 1;
36 case GLSL_SAMPLER_DIM_2D:
37 case GLSL_SAMPLER_DIM_RECT:
38 return is_array ? 3 : 2;
39 case GLSL_SAMPLER_DIM_3D:
40 return 3;
41 default:
42 unreachable("unexpected sampler type");
43 }
44 }
45
46 /* Lower image coordinates to a buffer element index. Return UINT_MAX if the image coordinates
47 * are out of bounds.
48 */
lower_image_coords(nir_builder * b,nir_def * desc,nir_def * coord,enum glsl_sampler_dim dim,bool is_array,bool handle_out_of_bounds)49 static nir_def *lower_image_coords(nir_builder *b, nir_def *desc, nir_def *coord,
50 enum glsl_sampler_dim dim, bool is_array,
51 bool handle_out_of_bounds)
52 {
53 unsigned num_coord_components = get_coord_components(dim, is_array);
54 nir_def *zero = nir_imm_int(b, 0);
55
56 /* Get coordinates. */
57 nir_def *x = nir_channel(b, coord, 0);
58 nir_def *y = num_coord_components >= 2 ? nir_channel(b, coord, 1) : NULL;
59 nir_def *z = num_coord_components >= 3 ? nir_channel(b, coord, 2) : NULL;
60
61 if (dim == GLSL_SAMPLER_DIM_1D && is_array) {
62 z = y;
63 y = NULL;
64 }
65
66 if (is_array) {
67 nir_def *first_layer = get_field(b, desc, 5, 0xffff0000);
68 z = nir_iadd(b, z, first_layer);
69 }
70
71 /* Compute the buffer element index. */
72 nir_def *index = x;
73 if (y) {
74 nir_def *pitch = nir_channel(b, desc, 6);
75 index = nir_iadd(b, index, nir_imul(b, pitch, y));
76 }
77 if (z) {
78 nir_def *slice_elements = nir_channel(b, desc, 7);
79 index = nir_iadd(b, index, nir_imul(b, slice_elements, z));
80 }
81
82 /* Determine whether the coordinates are out of bounds. */
83 nir_def *out_of_bounds = NULL;
84
85 if (handle_out_of_bounds) {
86 nir_def *width = get_field(b, desc, 4, 0xffff);
87 out_of_bounds = nir_ior(b, nir_ilt(b, x, zero), nir_ige(b, x, width));
88
89 if (y) {
90 nir_def *height = get_field(b, desc, 4, 0xffff0000);
91 out_of_bounds = nir_ior(b, out_of_bounds,
92 nir_ior(b, nir_ilt(b, y, zero), nir_ige(b, y, height)));
93 }
94 if (z) {
95 nir_def *depth = get_field(b, desc, 5, 0xffff);
96 out_of_bounds = nir_ior(b, out_of_bounds,
97 nir_ior(b, nir_ilt(b, z, zero), nir_ige(b, z, depth)));
98 }
99
100 /* Make the buffer opcode out of bounds by setting UINT_MAX. */
101 index = nir_bcsel(b, out_of_bounds, nir_imm_int(b, UINT_MAX), index);
102 }
103
104 return index;
105 }
106
emulated_image_load(nir_builder * b,unsigned num_components,unsigned bit_size,nir_def * desc,nir_def * coord,enum gl_access_qualifier access,enum glsl_sampler_dim dim,bool is_array,bool handle_out_of_bounds)107 static nir_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size,
108 nir_def *desc, nir_def *coord,
109 enum gl_access_qualifier access, enum glsl_sampler_dim dim,
110 bool is_array, bool handle_out_of_bounds)
111 {
112 nir_def *zero = nir_imm_int(b, 0);
113
114 return nir_load_buffer_amd(b, num_components, bit_size, nir_channels(b, desc, 0xf),
115 zero, zero,
116 lower_image_coords(b, desc, coord, dim, is_array,
117 handle_out_of_bounds),
118 .base = 0,
119 .memory_modes = nir_var_image,
120 .access = access | ACCESS_USES_FORMAT_AMD);
121 }
122
emulated_image_store(nir_builder * b,nir_def * desc,nir_def * coord,nir_def * data,enum gl_access_qualifier access,enum glsl_sampler_dim dim,bool is_array)123 static void emulated_image_store(nir_builder *b, nir_def *desc, nir_def *coord,
124 nir_def *data, enum gl_access_qualifier access,
125 enum glsl_sampler_dim dim, bool is_array)
126 {
127 nir_def *zero = nir_imm_int(b, 0);
128
129 nir_store_buffer_amd(b, data, nir_channels(b, desc, 0xf), zero, zero,
130 lower_image_coords(b, desc, coord, dim, is_array, true),
131 .base = 0,
132 .memory_modes = nir_var_image,
133 .access = access | ACCESS_USES_FORMAT_AMD);
134 }
135
136 /* Return the width, height, or depth for dim=0,1,2. */
get_dim(nir_builder * b,nir_def * desc,unsigned dim)137 static nir_def *get_dim(nir_builder *b, nir_def *desc, unsigned dim)
138 {
139 return get_field(b, desc, 4 + dim / 2, 0xffff << (16 * (dim % 2)));
140 }
141
142 /* Lower txl with lod=0 to typed buffer loads. This is based on the equations in the GL spec.
143 * This basically converts the tex opcode into 1 or more image_load opcodes.
144 */
emulated_tex_level_zero(nir_builder * b,unsigned num_components,unsigned bit_size,nir_def * desc,nir_def * sampler_desc,nir_def * coord_vec,enum glsl_sampler_dim sampler_dim,bool is_array)145 static nir_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components,
146 unsigned bit_size, nir_def *desc,
147 nir_def *sampler_desc, nir_def *coord_vec,
148 enum glsl_sampler_dim sampler_dim, bool is_array)
149 {
150 const enum gl_access_qualifier access =
151 ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER;
152 const unsigned num_coord_components = get_coord_components(sampler_dim, is_array);
153 const unsigned num_dim_coords = num_coord_components - is_array;
154 const unsigned array_comp = num_coord_components - 1;
155
156 nir_def *zero = nir_imm_int(b, 0);
157 nir_def *fp_one = nir_imm_floatN_t(b, 1, bit_size);
158 nir_def *coord[3] = {0};
159
160 assert(num_coord_components <= 3);
161 for (unsigned i = 0; i < num_coord_components; i++)
162 coord[i] = nir_channel(b, coord_vec, i);
163
164 /* Convert to unnormalized coordinates. */
165 if (sampler_dim != GLSL_SAMPLER_DIM_RECT) {
166 for (unsigned dim = 0; dim < num_dim_coords; dim++)
167 coord[dim] = nir_fmul(b, coord[dim], nir_u2f32(b, get_dim(b, desc, dim)));
168 }
169
170 /* The layer index is handled differently and ignores the filter and wrap mode. */
171 if (is_array) {
172 coord[array_comp] = nir_f2i32(b, nir_fround_even(b, coord[array_comp]));
173 coord[array_comp] = nir_iclamp(b, coord[array_comp], zero,
174 nir_iadd_imm(b, get_dim(b, desc, 2), -1));
175 }
176
177 /* Determine the filter by reading the first bit of the XY_MAG_FILTER field,
178 * which is 1 for linear, 0 for nearest.
179 *
180 * We assume that XY_MIN_FILTER and Z_FILTER are identical.
181 */
182 nir_def *is_nearest =
183 nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, sampler_desc, 2), 1 << 20), 0);
184 nir_def *result_nearest, *result_linear;
185
186 nir_if *if_nearest = nir_push_if(b, is_nearest);
187 {
188 /* Nearest filter. */
189 nir_def *coord0[3] = {0};
190 memcpy(coord0, coord, sizeof(coord));
191
192 for (unsigned dim = 0; dim < num_dim_coords; dim++) {
193 /* Convert to integer coordinates. (floor is required) */
194 coord0[dim] = nir_f2i32(b, nir_ffloor(b, coord0[dim]));
195
196 /* Apply the wrap mode. We assume it's always CLAMP_TO_EDGE, so clamp. */
197 coord0[dim] = nir_iclamp(b, coord0[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
198 }
199
200 /* Load the texel. */
201 result_nearest = emulated_image_load(b, num_components, bit_size, desc,
202 nir_vec(b, coord0, num_coord_components),
203 access, sampler_dim, is_array, false);
204 }
205 nir_push_else(b, if_nearest);
206 {
207 /* Linear filter. */
208 nir_def *coord0[3] = {0};
209 nir_def *coord1[3] = {0};
210 nir_def *weight[3] = {0};
211
212 memcpy(coord0, coord, sizeof(coord));
213
214 for (unsigned dim = 0; dim < num_dim_coords; dim++) {
215 /* First subtract 0.5. */
216 coord0[dim] = nir_fadd_imm(b, coord0[dim], -0.5);
217
218 /* Use fract to compute the filter weights. (FP16 results will get FP16 filter precision) */
219 weight[dim] = nir_f2fN(b, nir_ffract(b, coord0[dim]), bit_size);
220
221 /* Floor to get the top-left texel of the filter. */
222 /* Add 1 to get the bottom-right texel. */
223 coord0[dim] = nir_f2i32(b, nir_ffloor(b, coord0[dim]));
224 coord1[dim] = nir_iadd_imm(b, coord0[dim], 1);
225
226 /* Apply the wrap mode. We assume it's always CLAMP_TO_EDGE, so clamp. */
227 coord0[dim] = nir_iclamp(b, coord0[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
228 coord1[dim] = nir_iclamp(b, coord1[dim], zero, nir_iadd_imm(b, get_dim(b, desc, dim), -1));
229 }
230
231 /* Load all texels for the linear filter.
232 * This is 2 texels for 1D, 4 texels for 2D, and 8 texels for 3D.
233 */
234 nir_def *texel[8];
235
236 for (unsigned i = 0; i < (1 << num_dim_coords); i++) {
237 nir_def *texel_coord[3];
238
239 /* Determine whether the current texel should use channels from coord0
240 * or coord1. The i-th bit of the texel index determines that.
241 */
242 for (unsigned dim = 0; dim < num_dim_coords; dim++)
243 texel_coord[dim] = (i >> dim) & 0x1 ? coord1[dim] : coord0[dim];
244
245 /* Add the layer index, which doesn't change between texels. */
246 if (is_array)
247 texel_coord[array_comp] = coord0[array_comp];
248
249 /* Compute how much the texel contributes to the final result. */
250 nir_def *texel_weight = fp_one;
251 for (unsigned dim = 0; dim < num_dim_coords; dim++) {
252 /* Let's see what "i" represents:
253 * Texel i=0 = 000
254 * Texel i=1 = 001
255 * Texel i=2 = 010 (2D & 3D only)
256 * Texel i=3 = 011 (2D & 3D only)
257 * Texel i=4 = 100 (3D only)
258 * Texel i=5 = 101 (3D only)
259 * Texel i=6 = 110 (3D only)
260 * Texel i=7 = 111 (3D only)
261 *
262 * The rightmost bit (LSB) represents the X direction, the middle bit represents
263 * the Y direction, and the leftmost bit (MSB) represents the Z direction.
264 * If we shift the texel index "i" by the dimension "dim", we'll get whether that
265 * texel value should be multiplied by (1 - weight[dim]) or (weight[dim]).
266 */
267 texel_weight = nir_fmul(b, texel_weight,
268 (i >> dim) & 0x1 ? weight[dim] :
269 nir_fadd(b, fp_one, nir_fneg(b, weight[dim])));
270 }
271
272 /* Load the linear filter texel. */
273 texel[i] = emulated_image_load(b, num_components, bit_size, desc,
274 nir_vec(b, texel_coord, num_coord_components),
275 access, sampler_dim, is_array, false);
276
277 /* Multiply the texel by the weight. */
278 texel[i] = nir_fmul(b, texel[i], texel_weight);
279 }
280
281 /* Sum up all weighted texels to get the final result of linear filtering. */
282 result_linear = zero;
283 for (unsigned i = 0; i < (1 << num_dim_coords); i++)
284 result_linear = nir_fadd(b, result_linear, texel[i]);
285 }
286 nir_pop_if(b, if_nearest);
287
288 return nir_if_phi(b, result_nearest, result_linear);
289 }
290
lower_image_opcodes(nir_builder * b,nir_instr * instr,void * data)291 static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
292 {
293 if (instr->type == nir_instr_type_intrinsic) {
294 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
295 nir_deref_instr *deref;
296 enum gl_access_qualifier access;
297 enum glsl_sampler_dim dim;
298 bool is_array;
299 nir_def *desc = NULL, *result = NULL;
300 ASSERTED const char *intr_name;
301
302 nir_def *dst = &intr->def;
303 b->cursor = nir_before_instr(instr);
304
305 switch (intr->intrinsic) {
306 case nir_intrinsic_image_load:
307 case nir_intrinsic_image_store:
308 access = nir_intrinsic_access(intr);
309 dim = nir_intrinsic_image_dim(intr);
310 if (dim == GLSL_SAMPLER_DIM_BUF)
311 return false;
312 is_array = nir_intrinsic_image_array(intr);
313 desc = nir_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
314 32, intr->src[0].ssa);
315 break;
316
317 case nir_intrinsic_image_deref_load:
318 case nir_intrinsic_image_deref_store:
319 deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);
320 access = nir_deref_instr_get_variable(deref)->data.access;
321 dim = glsl_get_sampler_dim(deref->type);
322 if (dim == GLSL_SAMPLER_DIM_BUF)
323 return false;
324 is_array = glsl_sampler_type_is_array(deref->type);
325 desc = nir_image_deref_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
326 32, intr->src[0].ssa);
327 break;
328
329 case nir_intrinsic_bindless_image_load:
330 case nir_intrinsic_bindless_image_store:
331 access = nir_intrinsic_access(intr);
332 dim = nir_intrinsic_image_dim(intr);
333 if (dim == GLSL_SAMPLER_DIM_BUF)
334 return false;
335 is_array = nir_intrinsic_image_array(intr);
336 desc = nir_bindless_image_descriptor_amd(b, dim == GLSL_SAMPLER_DIM_BUF ? 4 : 8,
337 32, intr->src[0].ssa);
338 break;
339
340 default:
341 intr_name = nir_intrinsic_infos[intr->intrinsic].name;
342
343 /* No other intrinsics are expected from VAAPI and OpenMAX.
344 * (this lowering is only used by CDNA, which only uses those frontends)
345 */
346 if (strstr(intr_name, "image") == intr_name ||
347 strstr(intr_name, "bindless_image") == intr_name) {
348 fprintf(stderr, "Unexpected image opcode: ");
349 nir_print_instr(instr, stderr);
350 fprintf(stderr, "\nAborting to prevent a hang.");
351 abort();
352 }
353 return false;
354 }
355
356 switch (intr->intrinsic) {
357 case nir_intrinsic_image_load:
358 case nir_intrinsic_image_deref_load:
359 case nir_intrinsic_bindless_image_load:
360 result = emulated_image_load(b, intr->def.num_components, intr->def.bit_size,
361 desc, intr->src[1].ssa, access, dim, is_array, true);
362 nir_def_rewrite_uses_after(dst, result, instr);
363 nir_instr_remove(instr);
364 return true;
365
366 case nir_intrinsic_image_store:
367 case nir_intrinsic_image_deref_store:
368 case nir_intrinsic_bindless_image_store:
369 emulated_image_store(b, desc, intr->src[1].ssa, intr->src[3].ssa, access, dim, is_array);
370 nir_instr_remove(instr);
371 return true;
372
373 default:
374 unreachable("shouldn't get here");
375 }
376 } else if (instr->type == nir_instr_type_tex) {
377 nir_tex_instr *tex = nir_instr_as_tex(instr);
378 nir_tex_instr *new_tex;
379 nir_def *coord = NULL, *desc = NULL, *sampler_desc = NULL, *result = NULL;
380
381 nir_def *dst = &tex->def;
382 b->cursor = nir_before_instr(instr);
383
384 switch (tex->op) {
385 case nir_texop_tex:
386 case nir_texop_txl:
387 case nir_texop_txf:
388 for (unsigned i = 0; i < tex->num_srcs; i++) {
389 switch (tex->src[i].src_type) {
390 case nir_tex_src_texture_deref:
391 case nir_tex_src_texture_handle:
392 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
393 return false;
394 new_tex = nir_tex_instr_create(b->shader, 1);
395 new_tex->op = nir_texop_descriptor_amd;
396 new_tex->sampler_dim = tex->sampler_dim;
397 new_tex->is_array = tex->is_array;
398 new_tex->texture_index = tex->texture_index;
399 new_tex->sampler_index = tex->sampler_index;
400 new_tex->dest_type = nir_type_int32;
401 new_tex->src[0].src = nir_src_for_ssa(tex->src[i].src.ssa);
402 new_tex->src[0].src_type = tex->src[i].src_type;
403 nir_def_init(&new_tex->instr, &new_tex->def,
404 nir_tex_instr_dest_size(new_tex), 32);
405 nir_builder_instr_insert(b, &new_tex->instr);
406 desc = &new_tex->def;
407 break;
408
409 case nir_tex_src_sampler_deref:
410 case nir_tex_src_sampler_handle:
411 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
412 return false;
413 new_tex = nir_tex_instr_create(b->shader, 1);
414 new_tex->op = nir_texop_sampler_descriptor_amd;
415 new_tex->sampler_dim = tex->sampler_dim;
416 new_tex->is_array = tex->is_array;
417 new_tex->texture_index = tex->texture_index;
418 new_tex->sampler_index = tex->sampler_index;
419 new_tex->dest_type = nir_type_int32;
420 new_tex->src[0].src = nir_src_for_ssa(tex->src[i].src.ssa);
421 new_tex->src[0].src_type = tex->src[i].src_type;
422 nir_def_init(&new_tex->instr, &new_tex->def,
423 nir_tex_instr_dest_size(new_tex), 32);
424 nir_builder_instr_insert(b, &new_tex->instr);
425 sampler_desc = &new_tex->def;
426 break;
427
428 case nir_tex_src_coord:
429 coord = tex->src[i].src.ssa;
430 break;
431
432 case nir_tex_src_projector:
433 case nir_tex_src_comparator:
434 case nir_tex_src_offset:
435 case nir_tex_src_texture_offset:
436 case nir_tex_src_sampler_offset:
437 case nir_tex_src_plane:
438 unreachable("unsupported texture src");
439
440 default:;
441 }
442 }
443
444 switch (tex->op) {
445 case nir_texop_txf:
446 result = emulated_image_load(b, tex->def.num_components, tex->def.bit_size,
447 desc, coord,
448 ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
449 tex->sampler_dim, tex->is_array, true);
450 nir_def_rewrite_uses_after(dst, result, instr);
451 nir_instr_remove(instr);
452 return true;
453
454 case nir_texop_tex:
455 case nir_texop_txl:
456 result = emulated_tex_level_zero(b, tex->def.num_components, tex->def.bit_size,
457 desc, sampler_desc, coord, tex->sampler_dim, tex->is_array);
458 nir_def_rewrite_uses_after(dst, result, instr);
459 nir_instr_remove(instr);
460 return true;
461
462 default:
463 unreachable("shouldn't get here");
464 }
465 break;
466
467 case nir_texop_descriptor_amd:
468 case nir_texop_sampler_descriptor_amd:
469 return false;
470
471 default:
472 fprintf(stderr, "Unexpected texture opcode: ");
473 nir_print_instr(instr, stderr);
474 fprintf(stderr, "\nAborting to prevent a hang.");
475 abort();
476 }
477 }
478
479 return false;
480 }
481
ac_nir_lower_image_opcodes(nir_shader * nir)482 bool ac_nir_lower_image_opcodes(nir_shader *nir)
483 {
484 return nir_shader_instructions_pass(nir, lower_image_opcodes,
485 nir_metadata_dominance |
486 nir_metadata_block_index,
487 NULL);
488 }
489