• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Valve Corporation
3  * Copyright 2021 Alyssa Rosenzweig
4  * Copyright 2020 Collabora Ltd.
5  * Copyright 2016 Broadcom
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #include "compiler/nir/nir.h"
10 #include "compiler/nir/nir_builder.h"
11 #include "agx_nir.h"
12 #include "agx_nir_texture.h"
13 #include "glsl_types.h"
14 #include "libagx.h"
15 #include "nir_builder_opcodes.h"
16 #include "nir_builtin_builder.h"
17 #include "nir_intrinsics.h"
18 #include "nir_intrinsics_indices.h"
19 #include "shader_enums.h"
20 
21 static bool
fence_image(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)22 fence_image(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
23 {
24    b->cursor = nir_after_instr(&intr->instr);
25 
26    /* If the image is write-only, there is no fencing needed */
27    if (nir_intrinsic_has_access(intr) &&
28        (nir_intrinsic_access(intr) & ACCESS_NON_READABLE)) {
29       return false;
30    }
31 
32    switch (intr->intrinsic) {
33    case nir_intrinsic_image_store:
34    case nir_intrinsic_bindless_image_store:
35       nir_fence_pbe_to_tex_agx(b);
36       return true;
37 
38    case nir_intrinsic_image_atomic:
39    case nir_intrinsic_bindless_image_atomic:
40    case nir_intrinsic_image_atomic_swap:
41    case nir_intrinsic_bindless_image_atomic_swap:
42       nir_fence_mem_to_tex_agx(b);
43       return true;
44 
45    default:
46       return false;
47    }
48 }
49 
50 static nir_def *
texture_descriptor_ptr(nir_builder * b,nir_tex_instr * tex)51 texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
52 {
53    int handle_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
54    assert(handle_idx >= 0 && "must be bindless");
55    return nir_load_from_texture_handle_agx(b, tex->src[handle_idx].src.ssa);
56 }
57 
58 static bool
has_nonzero_lod(nir_tex_instr * tex)59 has_nonzero_lod(nir_tex_instr *tex)
60 {
61    int idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
62    if (idx < 0)
63       return false;
64 
65    nir_src src = tex->src[idx].src;
66    return !(nir_src_is_const(src) && nir_src_as_uint(src) == 0);
67 }
68 
69 static bool
lower_tex_crawl(nir_builder * b,nir_instr * instr,UNUSED void * data)70 lower_tex_crawl(nir_builder *b, nir_instr *instr, UNUSED void *data)
71 {
72    if (instr->type != nir_instr_type_tex)
73       return false;
74 
75    nir_tex_instr *tex = nir_instr_as_tex(instr);
76    b->cursor = nir_before_instr(instr);
77 
78    if (tex->op != nir_texop_txs && tex->op != nir_texop_texture_samples &&
79        tex->op != nir_texop_query_levels)
80       return false;
81 
82    nir_def *ptr = texture_descriptor_ptr(b, tex);
83    unsigned nr_comps = tex->def.num_components;
84    assert(nr_comps <= 3);
85 
86    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
87    nir_def *lod = lod_idx >= 0 ? nir_u2u16(b, tex->src[lod_idx].src.ssa)
88                                : nir_imm_intN_t(b, 0, 16);
89 
90    nir_def *res;
91    if (tex->op == nir_texop_txs) {
92       res =
93          libagx_txs(b, ptr, lod, nir_imm_int(b, nr_comps),
94                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_BUF),
95                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_1D),
96                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_2D),
97                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE),
98                     nir_imm_bool(b, tex->is_array));
99    } else if (tex->op == nir_texop_query_levels) {
100       res = libagx_texture_levels(b, ptr);
101    } else {
102       res = libagx_texture_samples(b, ptr);
103    }
104 
105    nir_def_rewrite_uses(&tex->def, nir_trim_vector(b, res, nr_comps));
106    nir_instr_remove(instr);
107    return true;
108 }
109 
110 /*
111  * Given a 1D buffer texture coordinate, calculate the 2D coordinate vector that
112  * will be used to access the linear 2D texture bound to the buffer.
113  */
114 static nir_def *
coords_for_buffer_texture(nir_builder * b,nir_def * coord)115 coords_for_buffer_texture(nir_builder *b, nir_def *coord)
116 {
117    return nir_vec2(b, nir_umod_imm(b, coord, AGX_TEXTURE_BUFFER_WIDTH),
118                    nir_udiv_imm(b, coord, AGX_TEXTURE_BUFFER_WIDTH));
119 }
120 
121 /*
122  * Buffer textures are lowered to 2D (1024xN) textures in the driver to access
123  * more storage. When lowering, we need to fix up the coordinate accordingly.
124  *
125  * Furthermore, RGB32 formats are emulated by lowering to global memory access,
126  * so to read a buffer texture we generate code that looks like:
127  *
128  *    if (descriptor->format == RGB32)
129  *       return ((uint32_t *) descriptor->address)[x];
130  *    else
131  *       return txf(texture_as_2d, vec2(x % 1024, x / 1024));
132  */
133 static bool
lower_buffer_texture(nir_builder * b,nir_tex_instr * tex)134 lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
135 {
136    nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
137    nir_def *size = nir_get_texture_size(b, tex);
138    nir_def *oob = nir_uge(b, coord, size);
139 
140    /* Apply the buffer offset after calculating oob but before remapping */
141    nir_def *desc = texture_descriptor_ptr(b, tex);
142    coord = libagx_buffer_texture_offset(b, desc, coord);
143 
144    /* Map out-of-bounds indices to out-of-bounds coordinates for robustness2
145     * semantics from the hardware.
146     */
147    coord = nir_bcsel(b, oob, nir_imm_int(b, -1), coord);
148 
149    bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float;
150 
151    /* Lower RGB32 reads if the format requires. If we are out-of-bounds, we use
152     * the hardware path so we get a zero texel.
153     */
154    nir_if *nif = nir_push_if(
155       b, nir_iand(b, libagx_texture_is_rgb32(b, desc), nir_inot(b, oob)));
156 
157    nir_def *rgb32 = nir_trim_vector(
158       b, libagx_texture_load_rgb32(b, desc, coord, nir_imm_bool(b, is_float)),
159       nir_tex_instr_dest_size(tex));
160 
161    nir_push_else(b, nif);
162 
163    /* Otherwise, lower the texture instruction to read from 2D */
164    assert(coord->num_components == 1 && "buffer textures are 1D");
165    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
166 
167    nir_def *coord2d = coords_for_buffer_texture(b, coord);
168    nir_instr_remove(&tex->instr);
169    nir_builder_instr_insert(b, &tex->instr);
170    nir_tex_instr_add_src(tex, nir_tex_src_backend1, coord2d);
171    nir_steal_tex_src(tex, nir_tex_src_sampler_handle);
172    nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
173    nir_block *else_block = nir_cursor_current_block(b->cursor);
174    nir_pop_if(b, nif);
175 
176    /* Put it together with a phi */
177    nir_def *phi = nir_if_phi(b, rgb32, &tex->def);
178    nir_def_rewrite_uses(&tex->def, phi);
179    nir_phi_instr *phi_instr = nir_instr_as_phi(phi->parent_instr);
180    nir_phi_src *else_src = nir_phi_get_src_from_block(phi_instr, else_block);
181    nir_src_rewrite(&else_src->src, &tex->def);
182    return true;
183 }
184 
185 /*
186  * NIR indexes into array textures with unclamped floats (integer for txf). AGX
187  * requires the index to be a clamped integer. Lower tex_src_coord into
188  * tex_src_backend1 for array textures by type-converting and clamping.
189  */
190 static bool
lower_regular_texture(nir_builder * b,nir_instr * instr,UNUSED void * data)191 lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
192 {
193    if (instr->type != nir_instr_type_tex)
194       return false;
195 
196    nir_tex_instr *tex = nir_instr_as_tex(instr);
197    b->cursor = nir_before_instr(instr);
198 
199    if (nir_tex_instr_is_query(tex) && tex->op != nir_texop_lod)
200       return false;
201 
202    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
203       return lower_buffer_texture(b, tex);
204 
205    /* Don't lower twice */
206    if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) >= 0)
207       return false;
208 
209    /* Get the coordinates */
210    nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
211    nir_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index);
212 
213    /* Apply txf workaround, see libagx_lower_txf_robustness */
214    bool is_txf = ((tex->op == nir_texop_txf) || (tex->op == nir_texop_txf_ms));
215 
216    if (is_txf && (has_nonzero_lod(tex) || tex->is_array) &&
217        !(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP)) {
218       int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
219       nir_def *lod =
220          lod_idx >= 0 ? tex->src[lod_idx].src.ssa : nir_undef(b, 1, 16);
221 
222       unsigned lidx = coord->num_components - 1;
223       nir_def *layer = nir_channel(b, coord, lidx);
224 
225       nir_def *replaced = libagx_lower_txf_robustness(
226          b, texture_descriptor_ptr(b, tex),
227          nir_imm_bool(b, has_nonzero_lod(tex)), lod,
228          nir_imm_bool(b, tex->is_array), layer, nir_channel(b, coord, 0));
229 
230       coord = nir_vector_insert_imm(b, coord, replaced, 0);
231    }
232 
233    /* The layer is always the last component of the NIR coordinate, split it off
234     * because we'll need to swizzle.
235     */
236    nir_def *layer = NULL;
237 
238    if (tex->is_array && tex->op != nir_texop_lod) {
239       unsigned lidx = coord->num_components - 1;
240       nir_def *unclamped_layer = nir_channel(b, coord, lidx);
241       coord = nir_trim_vector(b, coord, lidx);
242 
243       /* Round layer to nearest even */
244       if (!is_txf) {
245          unclamped_layer = nir_fround_even(b, unclamped_layer);
246 
247          /* Explicitly round negative to avoid undefined behaviour when constant
248           * folding. This is load bearing on x86 builds.
249           */
250          unclamped_layer =
251             nir_f2u32(b, nir_fmax(b, unclamped_layer, nir_imm_float(b, 0.0f)));
252       }
253 
254       /* For a cube array, the layer is zero-indexed component 3 of the
255        * coordinate but the number of layers is component 2 of the txs result.
256        */
257       if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
258          assert(lidx == 3 && "4 components");
259          lidx = 2;
260       }
261 
262       /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling.
263        * Layer must be 16-bits for the hardware, drop top bits after clamping.
264        *
265        * For txf, we drop out-of-bounds components rather than clamp, see the
266        * above txf robustness workaround.
267        */
268       if (!(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP) && !is_txf) {
269          nir_def *txs = nir_get_texture_size(b, tex);
270          nir_def *nr_layers = nir_channel(b, txs, lidx);
271          nir_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
272          layer = nir_umin(b, unclamped_layer, max_layer);
273       } else {
274          layer = unclamped_layer;
275       }
276 
277       layer = nir_u2u16(b, layer);
278    }
279 
280    /* Combine layer and multisample index into 32-bit so we don't need a vec5 or
281     * vec6 16-bit coordinate tuple, which would be inconvenient in NIR for
282     * little benefit (a minor optimization, I guess).
283     */
284    nir_def *sample_array = (ms_idx && layer)
285                               ? nir_pack_32_2x16_split(b, ms_idx, layer)
286                            : ms_idx ? nir_u2u32(b, ms_idx)
287                            : layer  ? nir_u2u32(b, layer)
288                                     : NULL;
289 
290    /* Combine into the final 32-bit tuple */
291    if (sample_array != NULL) {
292       unsigned end = coord->num_components;
293       coord = nir_pad_vector(b, coord, end + 1);
294       coord = nir_vector_insert_imm(b, coord, sample_array, end);
295    }
296 
297    nir_tex_instr_add_src(tex, nir_tex_src_backend1, coord);
298 
299    /* Furthermore, if there is an offset vector, it must be packed */
300    nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
301 
302    if (offset != NULL) {
303       nir_def *packed = NULL;
304 
305       for (unsigned c = 0; c < offset->num_components; ++c) {
306          nir_def *nibble = nir_iand_imm(b, nir_channel(b, offset, c), 0xF);
307          nir_def *shifted = nir_ishl_imm(b, nibble, 4 * c);
308 
309          if (packed != NULL)
310             packed = nir_ior(b, packed, shifted);
311          else
312             packed = shifted;
313       }
314 
315       nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed);
316    }
317 
318    /* We reserve bound sampler #0, so we offset bound samplers by 1 and
319     * otherwise map bound samplers as-is.
320     */
321    nir_def *sampler = nir_steal_tex_src(tex, nir_tex_src_sampler_offset);
322    if (!sampler)
323       sampler = nir_imm_intN_t(b, tex->sampler_index, 16);
324 
325    if (!is_txf &&
326        nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) < 0) {
327 
328       nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
329                             nir_iadd_imm(b, nir_u2u16(b, sampler), 1));
330    }
331 
332    return true;
333 }
334 
335 static nir_def *
bias_for_tex(nir_builder * b,nir_tex_instr * tex)336 bias_for_tex(nir_builder *b, nir_tex_instr *tex)
337 {
338    return nir_build_texture_query(b, tex, nir_texop_lod_bias_agx, 1,
339                                   nir_type_float16, false, false);
340 }
341 
342 static bool
lower_sampler_bias(nir_builder * b,nir_instr * instr,UNUSED void * data)343 lower_sampler_bias(nir_builder *b, nir_instr *instr, UNUSED void *data)
344 {
345    if (instr->type != nir_instr_type_tex)
346       return false;
347 
348    nir_tex_instr *tex = nir_instr_as_tex(instr);
349    b->cursor = nir_before_instr(instr);
350 
351    switch (tex->op) {
352    case nir_texop_tex: {
353       tex->op = nir_texop_txb;
354       nir_tex_instr_add_src(tex, nir_tex_src_bias, bias_for_tex(b, tex));
355       return true;
356    }
357 
358    case nir_texop_txb:
359    case nir_texop_txl: {
360       nir_tex_src_type src =
361          tex->op == nir_texop_txl ? nir_tex_src_lod : nir_tex_src_bias;
362 
363       nir_def *orig = nir_steal_tex_src(tex, src);
364       assert(orig != NULL && "invalid NIR");
365 
366       if (orig->bit_size != 16)
367          orig = nir_f2f16(b, orig);
368 
369       nir_tex_instr_add_src(tex, src, nir_fadd(b, orig, bias_for_tex(b, tex)));
370       return true;
371    }
372 
373    case nir_texop_txd: {
374       /* For txd, the computed level-of-detail is log2(rho)
375        * where rho should scale proportionally to all
376        * derivatives. So scale derivatives by exp2(bias) to
377        * get level-of-detail log2(exp2(bias) * rho) = bias + log2(rho).
378        */
379       nir_def *scale = nir_fexp2(b, nir_f2f32(b, bias_for_tex(b, tex)));
380       nir_tex_src_type src[] = {nir_tex_src_ddx, nir_tex_src_ddy};
381 
382       for (unsigned s = 0; s < ARRAY_SIZE(src); ++s) {
383          nir_def *orig = nir_steal_tex_src(tex, src[s]);
384          assert(orig != NULL && "invalid");
385 
386          nir_def *scaled = nir_fmul(b, nir_f2f32(b, orig), scale);
387          nir_tex_instr_add_src(tex, src[s], scaled);
388       }
389 
390       return true;
391    }
392 
393    case nir_texop_lod: {
394       nir_tex_instr_add_src(tex, nir_tex_src_bias, bias_for_tex(b, tex));
395       return true;
396    }
397 
398    case nir_texop_txf:
399    case nir_texop_txf_ms:
400    case nir_texop_txs:
401    case nir_texop_tg4:
402    case nir_texop_texture_samples:
403    case nir_texop_samples_identical:
404    case nir_texop_query_levels:
405       /* These operations do not use a sampler */
406       return false;
407 
408    default:
409       unreachable("Unhandled texture operation");
410    }
411 }
412 
413 static bool
legalize_image_lod(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)414 legalize_image_lod(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
415 {
416    nir_src *src;
417 
418 #define CASE(op, idx)                                                          \
419    case nir_intrinsic_##op:                                                    \
420    case nir_intrinsic_bindless_##op:                                           \
421       src = &intr->src[idx];                                                   \
422       break;
423 
424    switch (intr->intrinsic) {
425       CASE(image_load, 3)
426       CASE(image_store, 4)
427       CASE(image_size, 1)
428    default:
429       return false;
430    }
431 
432 #undef CASE
433 
434    if (src->ssa->bit_size == 16)
435       return false;
436 
437    b->cursor = nir_before_instr(&intr->instr);
438    nir_src_rewrite(src, nir_i2i16(b, src->ssa));
439    return true;
440 }
441 
442 static nir_def *
txs_for_image(nir_builder * b,nir_intrinsic_instr * intr,unsigned num_components,unsigned bit_size,bool query_samples)443 txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
444               unsigned num_components, unsigned bit_size, bool query_samples)
445 {
446    nir_tex_instr *tex = nir_tex_instr_create(b->shader, query_samples ? 1 : 2);
447    tex->op = query_samples ? nir_texop_texture_samples : nir_texop_txs;
448    tex->is_array = nir_intrinsic_image_array(intr);
449    tex->dest_type = nir_type_uint32;
450    tex->sampler_dim = nir_intrinsic_image_dim(intr);
451 
452    tex->src[0] =
453       nir_tex_src_for_ssa(nir_tex_src_texture_handle, intr->src[0].ssa);
454 
455    if (!query_samples)
456       tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, intr->src[1].ssa);
457 
458    nir_def_init(&tex->instr, &tex->def, num_components, bit_size);
459    nir_builder_instr_insert(b, &tex->instr);
460    nir_def *res = &tex->def;
461 
462    /* Cube images are implemented as 2D arrays, so we need to divide here. */
463    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && res->num_components > 2 &&
464        !query_samples) {
465       nir_def *divided = nir_udiv_imm(b, nir_channel(b, res, 2), 6);
466       res = nir_vector_insert_imm(b, res, divided, 2);
467    }
468 
469    return res;
470 }
471 
472 static nir_def *
image_texel_address(nir_builder * b,nir_intrinsic_instr * intr,bool return_index)473 image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
474                     bool return_index)
475 {
476    /* First, calculate the address of the PBE descriptor */
477    nir_def *desc_address =
478       nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
479 
480    nir_def *coord = intr->src[1].ssa;
481 
482    /* For atomics, we always infer the format. We only go down this path with
483     * formatless intrinsics when lowering multisampled image stores, but that
484     * uses the return_index path that ignores the block size.
485     */
486    enum pipe_format format = nir_intrinsic_format(intr);
487    assert(return_index || format != PIPE_FORMAT_NONE);
488 
489    nir_def *blocksize_B = nir_imm_int(b, util_format_get_blocksize(format));
490 
491    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
492    bool layered = nir_intrinsic_image_array(intr) ||
493                   (dim == GLSL_SAMPLER_DIM_CUBE) ||
494                   (dim == GLSL_SAMPLER_DIM_3D);
495 
496    if (dim == GLSL_SAMPLER_DIM_BUF && return_index) {
497       return nir_channel(b, coord, 0);
498    } else if (dim == GLSL_SAMPLER_DIM_BUF) {
499       return libagx_buffer_texel_address(b, desc_address, coord, blocksize_B);
500    } else {
501       return libagx_image_texel_address(
502          b, desc_address, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
503          nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_1D),
504          nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_MS), nir_imm_bool(b, layered),
505          nir_imm_bool(b, return_index));
506    }
507 }
508 
509 static void
lower_buffer_image(nir_builder * b,nir_intrinsic_instr * intr)510 lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
511 {
512    nir_def *coord_vector = intr->src[1].ssa;
513    nir_def *coord = nir_channel(b, coord_vector, 0);
514 
515    /* If we're not bindless, assume we don't need an offset (GL driver) */
516    if (intr->intrinsic == nir_intrinsic_bindless_image_load) {
517       nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
518       coord = libagx_buffer_texture_offset(b, desc, coord);
519    } else if (intr->intrinsic == nir_intrinsic_bindless_image_store) {
520       nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
521       coord = libagx_buffer_image_offset(b, desc, coord);
522    }
523 
524    /* Lower the buffer load/store to a 2D image load/store, matching the 2D
525     * texture/PBE descriptor the driver supplies for buffer images.
526     */
527    nir_def *coord2d = coords_for_buffer_texture(b, coord);
528    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
529    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
530 }
531 
532 static void
lower_1d_image(nir_builder * b,nir_intrinsic_instr * intr)533 lower_1d_image(nir_builder *b, nir_intrinsic_instr *intr)
534 {
535    nir_def *coord = intr->src[1].ssa;
536    bool is_array = nir_intrinsic_image_array(intr);
537    nir_def *zero = nir_imm_intN_t(b, 0, coord->bit_size);
538 
539    if (is_array) {
540       assert(coord->num_components >= 2);
541       coord =
542          nir_vec3(b, nir_channel(b, coord, 0), zero, nir_channel(b, coord, 1));
543    } else {
544       assert(coord->num_components >= 1);
545       coord = nir_vec2(b, coord, zero);
546    }
547 
548    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord, 4));
549    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
550 }
551 
552 /*
553  * Just like for txf, we need special handling around layers (and LODs, but we
554  * don't support mipmapped images yet) for robust image_loads. See
555  * libagx_lower_txf_robustness for more info.
556  */
557 static bool
lower_image_load_robustness(nir_builder * b,nir_intrinsic_instr * intr)558 lower_image_load_robustness(nir_builder *b, nir_intrinsic_instr *intr)
559 {
560    if (nir_intrinsic_access(intr) & ACCESS_IN_BOUNDS_AGX)
561       return false;
562 
563    /* We only need to worry about array-like loads */
564    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
565    if (!nir_intrinsic_image_array(intr) && dim != GLSL_SAMPLER_DIM_CUBE)
566       return false;
567 
568    /* Determine the coordinate component of the layer. Cubes and cube arrays
569     * keep their array in their last non-array coordinate component, other
570     * arrays are immediately after.
571     */
572    unsigned lidx = glsl_get_sampler_dim_coordinate_components(dim);
573    if (dim == GLSL_SAMPLER_DIM_CUBE)
574       lidx--;
575 
576    nir_def *coord = intr->src[1].ssa;
577    nir_def *lod = nir_undef(b, 1, 16);
578    nir_def *layer = nir_channel(b, coord, lidx);
579 
580    /* image_load is effectively the same as txf, reuse the txf lower */
581    nir_def *replaced = libagx_lower_txf_robustness(
582       b, nir_load_from_texture_handle_agx(b, intr->src[0].ssa),
583       nir_imm_bool(b, false /* lower LOD */), lod,
584       nir_imm_bool(b, true /* lower layer */), layer, nir_channel(b, coord, 0));
585 
586    nir_src_rewrite(&intr->src[1], nir_vector_insert_imm(b, coord, replaced, 0));
587    return true;
588 }
589 
590 static bool
lower_images(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)591 lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
592 {
593    b->cursor = nir_before_instr(&intr->instr);
594 
595    switch (intr->intrinsic) {
596    case nir_intrinsic_image_load:
597    case nir_intrinsic_image_store:
598    case nir_intrinsic_bindless_image_load:
599    case nir_intrinsic_bindless_image_store: {
600       /* Legalize MSAA index */
601       nir_src_rewrite(&intr->src[2], nir_u2u16(b, intr->src[2].ssa));
602 
603       if (intr->intrinsic == nir_intrinsic_image_load ||
604           intr->intrinsic == nir_intrinsic_bindless_image_load) {
605          lower_image_load_robustness(b, intr);
606       }
607 
608       switch (nir_intrinsic_image_dim(intr)) {
609       case GLSL_SAMPLER_DIM_1D:
610          lower_1d_image(b, intr);
611          return true;
612 
613       case GLSL_SAMPLER_DIM_BUF:
614          lower_buffer_image(b, intr);
615          return true;
616 
617       default:
618          return true;
619       }
620    }
621 
622    case nir_intrinsic_bindless_image_size:
623    case nir_intrinsic_bindless_image_samples:
624       nir_def_rewrite_uses(
625          &intr->def,
626          txs_for_image(
627             b, intr, intr->def.num_components, intr->def.bit_size,
628             intr->intrinsic == nir_intrinsic_bindless_image_samples));
629       return true;
630 
631    case nir_intrinsic_bindless_image_texel_address:
632       nir_def_rewrite_uses(&intr->def, image_texel_address(b, intr, false));
633       return true;
634 
635    case nir_intrinsic_image_size:
636    case nir_intrinsic_image_texel_address:
637       unreachable("should've been lowered");
638 
639    default:
640       return false;
641    }
642 }
643 
644 /*
645  * Map out-of-bounds storage texel buffer accesses and multisampled image stores
646  * to -1 indices, which will become an out-of-bounds hardware access. This gives
647  * cheap robustness2.
648  */
649 static bool
lower_robustness(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)650 lower_robustness(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
651 {
652    b->cursor = nir_before_instr(&intr->instr);
653 
654    switch (intr->intrinsic) {
655    case nir_intrinsic_image_deref_load:
656    case nir_intrinsic_image_deref_store:
657       break;
658    default:
659       return false;
660    }
661 
662    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
663    bool array = nir_intrinsic_image_array(intr);
664    unsigned size_components = nir_image_intrinsic_coord_components(intr);
665 
666    nir_def *deref = intr->src[0].ssa;
667    nir_def *coord = intr->src[1].ssa;
668 
669    if (dim != GLSL_SAMPLER_DIM_BUF &&
670        !(dim == GLSL_SAMPLER_DIM_MS &&
671          intr->intrinsic == nir_intrinsic_image_deref_store))
672       return false;
673 
674    /* Bounds check the coordinate */
675    nir_def *size =
676       nir_image_deref_size(b, size_components, 32, deref, nir_imm_int(b, 0),
677                            .image_dim = dim, .image_array = array);
678    nir_def *oob = nir_bany(b, nir_uge(b, coord, size));
679 
680    /* Bounds check the sample */
681    if (dim == GLSL_SAMPLER_DIM_MS) {
682       nir_def *samples = nir_image_deref_samples(b, 32, deref, .image_dim = dim,
683                                                  .image_array = array);
684 
685       oob = nir_ior(b, oob, nir_uge(b, intr->src[2].ssa, samples));
686    }
687 
688    /* Replace the last coordinate component with a large coordinate for
689     * out-of-bounds. We pick 0xFFF0 as it fits in 16-bit, and it is not signed
690     * as 32-bit so we won't get in-bounds coordinates for arrays due to two's
691     * complement wraparound. Additionally it still meets this requirement after
692     * adding 0xF, the maximum tail offset.
693     *
694     * This ensures the resulting hardware coordinate is definitely
695     * out-of-bounds, giving hardware-level robustness2 behaviour.
696     */
697    unsigned c = size_components - 1;
698    nir_def *r =
699       nir_bcsel(b, oob, nir_imm_int(b, 0xFFF0), nir_channel(b, coord, c));
700 
701    nir_src_rewrite(&intr->src[1], nir_vector_insert_imm(b, coord, r, c));
702    return true;
703 }
704 
705 /*
706  * Early texture lowering passes, called by the driver before lowering
707  * descriptor bindings. That means these passes operate on texture derefs. The
708  * purpose is to make descriptor crawls explicit in the NIR, so that the driver
709  * can accurately lower descriptors after this pass but before calling
710  * the full agx_nir_lower_texture.
711  */
712 bool
agx_nir_lower_texture_early(nir_shader * s,bool support_lod_bias)713 agx_nir_lower_texture_early(nir_shader *s, bool support_lod_bias)
714 {
715    bool progress = false;
716 
717    NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower_robustness,
718             nir_metadata_control_flow, NULL);
719 
720    nir_lower_tex_options lower_tex_options = {
721       .lower_txp = ~0,
722       .lower_invalid_implicit_lod = true,
723       .lower_tg4_offsets = true,
724       .lower_index_to_offset = true,
725 
726       /* Unclear if/how mipmapped 1D textures work in the hardware. */
727       .lower_1d = true,
728 
729       /* XXX: Metal seems to handle just like 3D txd, so why doesn't it work?
730        * TODO: Stop using this lowering
731        */
732       .lower_txd_cube_map = true,
733    };
734 
735    NIR_PASS(progress, s, nir_lower_tex, &lower_tex_options);
736 
737    /* Lower bias after nir_lower_tex (to get rid of txd) but before
738     * lower_regular_texture (which will shuffle around the sources)
739     */
740    if (support_lod_bias) {
741       NIR_PASS(progress, s, nir_shader_instructions_pass, lower_sampler_bias,
742                nir_metadata_control_flow, NULL);
743    }
744 
745    return progress;
746 }
747 
748 bool
agx_nir_lower_texture(nir_shader * s)749 agx_nir_lower_texture(nir_shader *s)
750 {
751    bool progress = false;
752 
753    nir_tex_src_type_constraints tex_constraints = {
754       [nir_tex_src_lod] = {true, 16},
755       [nir_tex_src_bias] = {true, 16},
756       [nir_tex_src_ms_index] = {true, 16},
757       [nir_tex_src_min_lod] = {true, 16},
758       [nir_tex_src_texture_offset] = {true, 16},
759       [nir_tex_src_sampler_offset] = {true, 16},
760    };
761 
762    /* Insert fences before lowering image atomics, since image atomics need
763     * different fencing than other image operations.
764     */
765    NIR_PASS(progress, s, nir_shader_intrinsics_pass, fence_image,
766             nir_metadata_control_flow, NULL);
767 
768    NIR_PASS(progress, s, nir_lower_image_atomics_to_global);
769 
770    NIR_PASS(progress, s, nir_shader_intrinsics_pass, legalize_image_lod,
771             nir_metadata_control_flow, NULL);
772    NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower_images,
773             nir_metadata_control_flow, NULL);
774    NIR_PASS(progress, s, nir_legalize_16bit_sampler_srcs, tex_constraints);
775 
776    /* Fold constants after nir_legalize_16bit_sampler_srcs so we can detect 0 in
777     * lower_regular_texture. This is required for correctness.
778     */
779    NIR_PASS(progress, s, nir_opt_constant_folding);
780 
781    /* Lower texture sources after legalizing types (as the lowering depends on
782     * 16-bit multisample indices) but before lowering queries (as the lowering
783     * generates txs for array textures).
784     */
785    NIR_PASS(progress, s, nir_shader_instructions_pass, lower_regular_texture,
786             nir_metadata_none, NULL);
787    NIR_PASS(progress, s, nir_shader_instructions_pass, lower_tex_crawl,
788             nir_metadata_control_flow, NULL);
789 
790    return progress;
791 }
792 
793 static bool
lower_multisampled_store(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)794 lower_multisampled_store(nir_builder *b, nir_intrinsic_instr *intr,
795                          UNUSED void *data)
796 {
797    b->cursor = nir_before_instr(&intr->instr);
798 
799    if (intr->intrinsic != nir_intrinsic_bindless_image_store)
800       return false;
801 
802    if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_MS)
803       return false;
804 
805    nir_def *index_px = nir_u2u32(b, image_texel_address(b, intr, true));
806    nir_def *coord2d = coords_for_buffer_texture(b, index_px);
807 
808    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
809    nir_src_rewrite(&intr->src[2], nir_imm_int(b, 0));
810    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
811    nir_intrinsic_set_image_array(intr, false);
812    return true;
813 }
814 
815 bool
agx_nir_lower_multisampled_image_store(nir_shader * s)816 agx_nir_lower_multisampled_image_store(nir_shader *s)
817 {
818    return nir_shader_intrinsics_pass(s, lower_multisampled_store,
819                                      nir_metadata_control_flow, NULL);
820 }
821 
822 /*
823  * Given a non-bindless instruction, return whether agx_nir_lower_texture will
824  * lower it to something involving a descriptor crawl. This requires the driver
825  * to lower the instruction to bindless before calling agx_nir_lower_texture.
826  * The implementation just enumerates the cases handled in this file.
827  */
828 bool
agx_nir_needs_texture_crawl(nir_instr * instr)829 agx_nir_needs_texture_crawl(nir_instr *instr)
830 {
831    if (instr->type == nir_instr_type_intrinsic) {
832       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
833 
834       switch (intr->intrinsic) {
835       /* Queries, atomics always become a crawl */
836       case nir_intrinsic_image_size:
837       case nir_intrinsic_image_deref_size:
838       case nir_intrinsic_image_samples:
839       case nir_intrinsic_image_deref_samples:
840       case nir_intrinsic_image_atomic:
841       case nir_intrinsic_image_deref_atomic:
842       case nir_intrinsic_image_atomic_swap:
843       case nir_intrinsic_image_deref_atomic_swap:
844          return true;
845 
846       /* Multisampled stores need a crawl, others do not */
847       case nir_intrinsic_image_store:
848       case nir_intrinsic_image_deref_store:
849          return nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_MS;
850 
851       /* Array loads need a crawl, other load do not */
852       case nir_intrinsic_image_load:
853          return nir_intrinsic_image_array(intr) ||
854                 nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_CUBE;
855 
856       default:
857          return false;
858       }
859    } else if (instr->type == nir_instr_type_tex) {
860       nir_tex_instr *tex = nir_instr_as_tex(instr);
861 
862       /* Array textures get clamped to their size via txs */
863       if (tex->is_array && !(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP))
864          return true;
865 
866       switch (tex->op) {
867       /* Queries always become a crawl */
868       case nir_texop_txs:
869       case nir_texop_texture_samples:
870       case nir_texop_query_levels:
871          return true;
872 
873       /* Buffer textures need their format read and txf needs its LOD/layer
874        * clamped.  Buffer textures are only read through txf.
875        */
876       case nir_texop_txf:
877       case nir_texop_txf_ms:
878          return has_nonzero_lod(tex) || tex->is_array ||
879                 tex->sampler_dim == GLSL_SAMPLER_DIM_BUF;
880 
881       default:
882          return false;
883       }
884    }
885 
886    return false;
887 }
888