• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Valve Corporation
3  * Copyright 2021 Alyssa Rosenzweig
4  * Copyright 2020 Collabora Ltd.
5  * Copyright 2016 Broadcom
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #include "compiler/nir/nir.h"
10 #include "compiler/nir/nir_builder.h"
11 #include "agx_internal_formats.h"
12 #include "agx_nir_passes.h"
13 #include "libagx_shaders.h"
14 #include "nir_builtin_builder.h"
15 
16 static bool
fence_image(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)17 fence_image(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
18 {
19    b->cursor = nir_after_instr(&intr->instr);
20 
21    /* If the image is write-only, there is no fencing needed */
22    if (nir_intrinsic_has_access(intr) &&
23        (nir_intrinsic_access(intr) & ACCESS_NON_READABLE)) {
24       return false;
25    }
26 
27    switch (intr->intrinsic) {
28    case nir_intrinsic_image_store:
29    case nir_intrinsic_bindless_image_store:
30       nir_fence_pbe_to_tex_agx(b);
31       return true;
32 
33    case nir_intrinsic_image_atomic:
34    case nir_intrinsic_bindless_image_atomic:
35    case nir_intrinsic_image_atomic_swap:
36    case nir_intrinsic_bindless_image_atomic_swap:
37       nir_fence_mem_to_tex_agx(b);
38       return true;
39 
40    default:
41       return false;
42    }
43 }
44 
45 static nir_def *
texture_descriptor_ptr(nir_builder * b,nir_tex_instr * tex)46 texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
47 {
48    int handle_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
49    assert(handle_idx >= 0 && "must be bindless");
50    return nir_load_from_texture_handle_agx(b, tex->src[handle_idx].src.ssa);
51 }
52 
53 static bool
has_nonzero_lod(nir_tex_instr * tex)54 has_nonzero_lod(nir_tex_instr *tex)
55 {
56    int idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
57    if (idx < 0)
58       return false;
59 
60    nir_src src = tex->src[idx].src;
61    return !(nir_src_is_const(src) && nir_src_as_uint(src) == 0);
62 }
63 
64 static bool
lower_tex_crawl(nir_builder * b,nir_instr * instr,UNUSED void * data)65 lower_tex_crawl(nir_builder *b, nir_instr *instr, UNUSED void *data)
66 {
67    if (instr->type != nir_instr_type_tex)
68       return false;
69 
70    nir_tex_instr *tex = nir_instr_as_tex(instr);
71    b->cursor = nir_before_instr(instr);
72 
73    if (tex->op != nir_texop_txs && tex->op != nir_texop_texture_samples &&
74        tex->op != nir_texop_query_levels)
75       return false;
76 
77    nir_def *ptr = texture_descriptor_ptr(b, tex);
78    unsigned nr_comps = tex->def.num_components;
79    assert(nr_comps <= 3);
80 
81    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
82    nir_def *lod = lod_idx >= 0 ? nir_u2u16(b, tex->src[lod_idx].src.ssa)
83                                : nir_imm_intN_t(b, 0, 16);
84 
85    nir_def *res;
86    if (tex->op == nir_texop_txs) {
87       res =
88          libagx_txs(b, ptr, lod, nir_imm_int(b, nr_comps),
89                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_BUF),
90                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_1D),
91                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_2D),
92                     nir_imm_bool(b, tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE),
93                     nir_imm_bool(b, tex->is_array));
94    } else if (tex->op == nir_texop_query_levels) {
95       res = libagx_texture_levels(b, ptr);
96    } else {
97       res = libagx_texture_samples(b, ptr);
98    }
99 
100    nir_def_rewrite_uses(&tex->def, nir_trim_vector(b, res, nr_comps));
101    nir_instr_remove(instr);
102    return true;
103 }
104 
105 /*
106  * Given a 1D buffer texture coordinate, calculate the 2D coordinate vector that
107  * will be used to access the linear 2D texture bound to the buffer.
108  */
109 static nir_def *
coords_for_buffer_texture(nir_builder * b,nir_def * coord)110 coords_for_buffer_texture(nir_builder *b, nir_def *coord)
111 {
112    return nir_vec2(b, nir_iand_imm(b, coord, BITFIELD_MASK(10)),
113                    nir_ushr_imm(b, coord, 10));
114 }
115 
116 /*
117  * Buffer textures are lowered to 2D (1024xN) textures in the driver to access
118  * more storage. When lowering, we need to fix up the coordinate accordingly.
119  *
120  * Furthermore, RGB32 formats are emulated by lowering to global memory access,
121  * so to read a buffer texture we generate code that looks like:
122  *
123  *    if (descriptor->format == RGB32)
124  *       return ((uint32_t *) descriptor->address)[x];
125  *    else
126  *       return txf(texture_as_2d, vec2(x % 1024, x / 1024));
127  */
128 static bool
lower_buffer_texture(nir_builder * b,nir_tex_instr * tex)129 lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
130 {
131    nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
132 
133    /* The OpenGL ES 3.2 specification says on page 187:
134     *
135     *    When a buffer texture is accessed in a shader, the results of a texel
136     *    fetch are undefined if the specified texel coordinate is negative, or
137     *    greater than or equal to the clamped number of texels in the texture
138     *    image.
139     *
140     * However, faulting would be undesirable for robustness, so clamp.
141     */
142    nir_def *size = nir_get_texture_size(b, tex);
143    coord = nir_umin(b, coord, nir_iadd_imm(b, size, -1));
144 
145    nir_def *desc = texture_descriptor_ptr(b, tex);
146    bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float;
147 
148    /* Lower RGB32 reads if the format requires */
149    nir_if *nif = nir_push_if(b, libagx_texture_is_rgb32(b, desc));
150 
151    nir_def *rgb32 = nir_trim_vector(
152       b, libagx_texture_load_rgb32(b, desc, coord, nir_imm_bool(b, is_float)),
153       nir_tex_instr_dest_size(tex));
154 
155    nir_push_else(b, nif);
156 
157    /* Otherwise, lower the texture instruction to read from 2D */
158    assert(coord->num_components == 1 && "buffer textures are 1D");
159    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
160 
161    nir_def *coord2d = coords_for_buffer_texture(b, coord);
162    nir_instr_remove(&tex->instr);
163    nir_builder_instr_insert(b, &tex->instr);
164    nir_tex_instr_add_src(tex, nir_tex_src_backend1, coord2d);
165    nir_block *else_block = nir_cursor_current_block(b->cursor);
166    nir_pop_if(b, nif);
167 
168    /* Put it together with a phi */
169    nir_def *phi = nir_if_phi(b, rgb32, &tex->def);
170    nir_def_rewrite_uses(&tex->def, phi);
171    nir_phi_instr *phi_instr = nir_instr_as_phi(phi->parent_instr);
172    nir_phi_src *else_src = nir_phi_get_src_from_block(phi_instr, else_block);
173    nir_src_rewrite(&else_src->src, &tex->def);
174    return true;
175 }
176 
177 /*
178  * NIR indexes into array textures with unclamped floats (integer for txf). AGX
179  * requires the index to be a clamped integer. Lower tex_src_coord into
180  * tex_src_backend1 for array textures by type-converting and clamping.
181  */
182 static bool
lower_regular_texture(nir_builder * b,nir_instr * instr,UNUSED void * data)183 lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
184 {
185    if (instr->type != nir_instr_type_tex)
186       return false;
187 
188    nir_tex_instr *tex = nir_instr_as_tex(instr);
189    b->cursor = nir_before_instr(instr);
190 
191    if (nir_tex_instr_is_query(tex) && tex->op != nir_texop_lod)
192       return false;
193 
194    if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
195       return lower_buffer_texture(b, tex);
196 
197    /* Don't lower twice */
198    if (nir_tex_instr_src_index(tex, nir_tex_src_backend1) >= 0)
199       return false;
200 
201    /* Get the coordinates */
202    nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
203    nir_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index);
204 
205    /* Apply txf workaround, see libagx_lower_txf_robustness */
206    bool is_txf = ((tex->op == nir_texop_txf) || (tex->op == nir_texop_txf_ms));
207 
208    if (is_txf && has_nonzero_lod(tex) &&
209        !(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP)) {
210 
211       int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
212 
213       nir_def *replaced = libagx_lower_txf_robustness(
214          b, texture_descriptor_ptr(b, tex), tex->src[lod_idx].src.ssa,
215          nir_channel(b, coord, 0));
216 
217       coord = nir_vector_insert_imm(b, coord, replaced, 0);
218    }
219 
220    /* The layer is always the last component of the NIR coordinate, split it off
221     * because we'll need to swizzle.
222     */
223    nir_def *layer = NULL;
224 
225    if (tex->is_array) {
226       unsigned lidx = coord->num_components - 1;
227       nir_def *unclamped_layer = nir_channel(b, coord, lidx);
228       coord = nir_trim_vector(b, coord, lidx);
229 
230       /* Round layer to nearest even */
231       if (!is_txf)
232          unclamped_layer = nir_f2u32(b, nir_fround_even(b, unclamped_layer));
233 
234       /* For a cube array, the layer is zero-indexed component 3 of the
235        * coordinate but the number of layers is component 2 of the txs result.
236        */
237       if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
238          assert(lidx == 3 && "4 components");
239          lidx = 2;
240       }
241 
242       /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling.
243        * Layer must be 16-bits for the hardware, drop top bits after clamping.
244        */
245       if (!(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP)) {
246          nir_def *txs = nir_get_texture_size(b, tex);
247          nir_def *nr_layers = nir_channel(b, txs, lidx);
248          nir_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
249          layer = nir_umin(b, unclamped_layer, max_layer);
250       } else {
251          layer = unclamped_layer;
252       }
253 
254       layer = nir_u2u16(b, layer);
255    }
256 
257    /* Combine layer and multisample index into 32-bit so we don't need a vec5 or
258     * vec6 16-bit coordinate tuple, which would be inconvenient in NIR for
259     * little benefit (a minor optimization, I guess).
260     */
261    nir_def *sample_array = (ms_idx && layer)
262                               ? nir_pack_32_2x16_split(b, ms_idx, layer)
263                            : ms_idx ? nir_u2u32(b, ms_idx)
264                            : layer  ? nir_u2u32(b, layer)
265                                     : NULL;
266 
267    /* Combine into the final 32-bit tuple */
268    if (sample_array != NULL) {
269       unsigned end = coord->num_components;
270       coord = nir_pad_vector(b, coord, end + 1);
271       coord = nir_vector_insert_imm(b, coord, sample_array, end);
272    }
273 
274    nir_tex_instr_add_src(tex, nir_tex_src_backend1, coord);
275 
276    /* Furthermore, if there is an offset vector, it must be packed */
277    nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
278 
279    if (offset != NULL) {
280       nir_def *packed = NULL;
281 
282       for (unsigned c = 0; c < offset->num_components; ++c) {
283          nir_def *nibble = nir_iand_imm(b, nir_channel(b, offset, c), 0xF);
284          nir_def *shifted = nir_ishl_imm(b, nibble, 4 * c);
285 
286          if (packed != NULL)
287             packed = nir_ior(b, packed, shifted);
288          else
289             packed = shifted;
290       }
291 
292       nir_tex_instr_add_src(tex, nir_tex_src_backend2, packed);
293    }
294 
295    return true;
296 }
297 
298 static nir_def *
bias_for_tex(nir_builder * b,nir_tex_instr * tex)299 bias_for_tex(nir_builder *b, nir_tex_instr *tex)
300 {
301    nir_instr *instr = nir_get_texture_size(b, tex)->parent_instr;
302    nir_tex_instr *query = nir_instr_as_tex(instr);
303 
304    query->op = nir_texop_lod_bias_agx;
305    query->dest_type = nir_type_float16;
306 
307    nir_def_init(instr, &query->def, 1, 16);
308    return &query->def;
309 }
310 
311 static bool
lower_sampler_bias(nir_builder * b,nir_instr * instr,UNUSED void * data)312 lower_sampler_bias(nir_builder *b, nir_instr *instr, UNUSED void *data)
313 {
314    if (instr->type != nir_instr_type_tex)
315       return false;
316 
317    nir_tex_instr *tex = nir_instr_as_tex(instr);
318    b->cursor = nir_before_instr(instr);
319 
320    switch (tex->op) {
321    case nir_texop_tex: {
322       tex->op = nir_texop_txb;
323       nir_tex_instr_add_src(tex, nir_tex_src_bias, bias_for_tex(b, tex));
324       return true;
325    }
326 
327    case nir_texop_txb:
328    case nir_texop_txl: {
329       nir_tex_src_type src =
330          tex->op == nir_texop_txl ? nir_tex_src_lod : nir_tex_src_bias;
331 
332       nir_def *orig = nir_steal_tex_src(tex, src);
333       assert(orig != NULL && "invalid NIR");
334 
335       if (orig->bit_size != 16)
336          orig = nir_f2f16(b, orig);
337 
338       nir_tex_instr_add_src(tex, src, nir_fadd(b, orig, bias_for_tex(b, tex)));
339       return true;
340    }
341 
342    case nir_texop_txd: {
343       /* For txd, the computed level-of-detail is log2(rho)
344        * where rho should scale proportionally to all
345        * derivatives. So scale derivatives by exp2(bias) to
346        * get level-of-detail log2(exp2(bias) * rho) = bias + log2(rho).
347        */
348       nir_def *scale = nir_fexp2(b, nir_f2f32(b, bias_for_tex(b, tex)));
349       nir_tex_src_type src[] = {nir_tex_src_ddx, nir_tex_src_ddy};
350 
351       for (unsigned s = 0; s < ARRAY_SIZE(src); ++s) {
352          nir_def *orig = nir_steal_tex_src(tex, src[s]);
353          assert(orig != NULL && "invalid");
354 
355          nir_def *scaled = nir_fmul(b, nir_f2f32(b, orig), scale);
356          nir_tex_instr_add_src(tex, src[s], scaled);
357       }
358 
359       return true;
360    }
361 
362    case nir_texop_lod: {
363       nir_tex_instr_add_src(tex, nir_tex_src_bias, bias_for_tex(b, tex));
364       return true;
365    }
366 
367    case nir_texop_txf:
368    case nir_texop_txf_ms:
369    case nir_texop_txs:
370    case nir_texop_tg4:
371    case nir_texop_texture_samples:
372    case nir_texop_samples_identical:
373    case nir_texop_query_levels:
374       /* These operations do not use a sampler */
375       return false;
376 
377    default:
378       unreachable("Unhandled texture operation");
379    }
380 }
381 
382 static bool
legalize_image_lod(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)383 legalize_image_lod(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
384 {
385    nir_src *src;
386 
387 #define CASE(op, idx)                                                          \
388    case nir_intrinsic_##op:                                                    \
389    case nir_intrinsic_bindless_##op:                                           \
390       src = &intr->src[idx];                                                   \
391       break;
392 
393    switch (intr->intrinsic) {
394       CASE(image_load, 3)
395       CASE(image_store, 4)
396       CASE(image_size, 1)
397    default:
398       return false;
399    }
400 
401 #undef CASE
402 
403    if (src->ssa->bit_size == 16)
404       return false;
405 
406    b->cursor = nir_before_instr(&intr->instr);
407    nir_src_rewrite(src, nir_i2i16(b, src->ssa));
408    return true;
409 }
410 
411 static nir_def *
txs_for_image(nir_builder * b,nir_intrinsic_instr * intr,unsigned num_components,unsigned bit_size,bool query_samples)412 txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
413               unsigned num_components, unsigned bit_size, bool query_samples)
414 {
415    nir_tex_instr *tex = nir_tex_instr_create(b->shader, query_samples ? 1 : 2);
416    tex->op = query_samples ? nir_texop_texture_samples : nir_texop_txs;
417    tex->is_array = nir_intrinsic_image_array(intr);
418    tex->dest_type = nir_type_uint32;
419    tex->sampler_dim = nir_intrinsic_image_dim(intr);
420 
421    tex->src[0] =
422       nir_tex_src_for_ssa(nir_tex_src_texture_handle, intr->src[0].ssa);
423 
424    if (!query_samples)
425       tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, intr->src[1].ssa);
426 
427    nir_def_init(&tex->instr, &tex->def, num_components, bit_size);
428    nir_builder_instr_insert(b, &tex->instr);
429    nir_def *res = &tex->def;
430 
431    /* Cube images are implemented as 2D arrays, so we need to divide here. */
432    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && res->num_components > 2 &&
433        !query_samples) {
434       nir_def *divided = nir_udiv_imm(b, nir_channel(b, res, 2), 6);
435       res = nir_vector_insert_imm(b, res, divided, 2);
436    }
437 
438    return res;
439 }
440 
441 static nir_def *
image_texel_address(nir_builder * b,nir_intrinsic_instr * intr,bool return_index)442 image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
443                     bool return_index)
444 {
445    /* First, calculate the address of the PBE descriptor */
446    nir_def *desc_address =
447       nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
448 
449    nir_def *coord = intr->src[1].ssa;
450    enum pipe_format format = nir_intrinsic_format(intr);
451    nir_def *blocksize_B = nir_imm_int(b, util_format_get_blocksize(format));
452 
453    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
454    bool layered = nir_intrinsic_image_array(intr) ||
455                   (dim == GLSL_SAMPLER_DIM_CUBE) ||
456                   (dim == GLSL_SAMPLER_DIM_3D);
457 
458    /* The last 8 bytes of the 24-byte PBE descriptor points to the
459     * software-defined atomic descriptor.  Grab the address.
460     */
461    nir_def *meta_meta_ptr = nir_iadd_imm(b, desc_address, 16);
462    nir_def *meta_ptr = nir_load_global_constant(b, meta_meta_ptr, 8, 1, 64);
463 
464    if (dim == GLSL_SAMPLER_DIM_BUF && return_index) {
465       return nir_channel(b, coord, 0);
466    } else if (dim == GLSL_SAMPLER_DIM_BUF) {
467       return libagx_buffer_texel_address(b, meta_ptr, coord, blocksize_B);
468    } else {
469       return libagx_image_texel_address(
470          b, meta_ptr, coord, nir_u2u32(b, intr->src[2].ssa), blocksize_B,
471          nir_imm_bool(b, dim == GLSL_SAMPLER_DIM_MS), nir_imm_bool(b, layered),
472          nir_imm_bool(b, return_index));
473    }
474 }
475 
476 static void
lower_buffer_image(nir_builder * b,nir_intrinsic_instr * intr)477 lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
478 {
479    nir_def *coord_vector = intr->src[1].ssa;
480    nir_def *coord = nir_channel(b, coord_vector, 0);
481 
482    /* Lower the buffer load/store to a 2D image load/store, matching the 2D
483     * texture/PBE descriptor the driver supplies for buffer images.
484     */
485    nir_def *coord2d = coords_for_buffer_texture(b, coord);
486    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
487    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
488 }
489 
490 static void
lower_1d_image(nir_builder * b,nir_intrinsic_instr * intr)491 lower_1d_image(nir_builder *b, nir_intrinsic_instr *intr)
492 {
493    nir_def *coord = intr->src[1].ssa;
494    bool is_array = nir_intrinsic_image_array(intr);
495    nir_def *zero = nir_imm_intN_t(b, 0, coord->bit_size);
496 
497    if (is_array) {
498       assert(coord->num_components >= 2);
499       coord =
500          nir_vec3(b, nir_channel(b, coord, 0), zero, nir_channel(b, coord, 1));
501    } else {
502       assert(coord->num_components >= 1);
503       coord = nir_vec2(b, coord, zero);
504    }
505 
506    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord, 4));
507    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
508 }
509 
510 static bool
lower_images(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)511 lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
512 {
513    b->cursor = nir_before_instr(&intr->instr);
514 
515    switch (intr->intrinsic) {
516    case nir_intrinsic_image_load:
517    case nir_intrinsic_image_store:
518    case nir_intrinsic_bindless_image_load:
519    case nir_intrinsic_bindless_image_store: {
520       /* Legalize MSAA index */
521       nir_src_rewrite(&intr->src[2], nir_u2u16(b, intr->src[2].ssa));
522 
523       switch (nir_intrinsic_image_dim(intr)) {
524       case GLSL_SAMPLER_DIM_1D:
525          lower_1d_image(b, intr);
526          return true;
527 
528       case GLSL_SAMPLER_DIM_BUF:
529          lower_buffer_image(b, intr);
530          return true;
531 
532       default:
533          return true;
534       }
535    }
536 
537    case nir_intrinsic_bindless_image_size:
538    case nir_intrinsic_bindless_image_samples:
539       nir_def_rewrite_uses(
540          &intr->def,
541          txs_for_image(
542             b, intr, intr->def.num_components, intr->def.bit_size,
543             intr->intrinsic == nir_intrinsic_bindless_image_samples));
544       return true;
545 
546    case nir_intrinsic_bindless_image_texel_address:
547       nir_def_rewrite_uses(&intr->def, image_texel_address(b, intr, false));
548       return true;
549 
550    case nir_intrinsic_image_size:
551    case nir_intrinsic_image_texel_address:
552       unreachable("should've been lowered");
553 
554    default:
555       return false;
556    }
557 }
558 
559 /*
560  * Early texture lowering passes, called by the driver before lowering
561  * descriptor bindings. That means these passes operate on texture derefs. The
562  * purpose is to make descriptor crawls explicit in the NIR, so that the driver
563  * can accurately lower descriptors after this pass but before calling
564  * the full agx_nir_lower_texture.
565  */
566 bool
agx_nir_lower_texture_early(nir_shader * s,bool support_lod_bias)567 agx_nir_lower_texture_early(nir_shader *s, bool support_lod_bias)
568 {
569    bool progress = false;
570 
571    nir_lower_tex_options lower_tex_options = {
572       .lower_txp = ~0,
573       .lower_invalid_implicit_lod = true,
574       .lower_tg4_offsets = true,
575       .lower_index_to_offset = true,
576 
577       /* Unclear if/how mipmapped 1D textures work in the hardware. */
578       .lower_1d = true,
579 
580       /* XXX: Metal seems to handle just like 3D txd, so why doesn't it work?
581        * TODO: Stop using this lowering
582        */
583       .lower_txd_cube_map = true,
584    };
585 
586    NIR_PASS(progress, s, nir_lower_tex, &lower_tex_options);
587 
588    /* Lower bias after nir_lower_tex (to get rid of txd) but before
589     * lower_regular_texture (which will shuffle around the sources)
590     */
591    if (support_lod_bias) {
592       NIR_PASS(progress, s, nir_shader_instructions_pass, lower_sampler_bias,
593                nir_metadata_block_index | nir_metadata_dominance, NULL);
594    }
595 
596    return progress;
597 }
598 
599 bool
agx_nir_lower_texture(nir_shader * s)600 agx_nir_lower_texture(nir_shader *s)
601 {
602    bool progress = false;
603 
604    nir_tex_src_type_constraints tex_constraints = {
605       [nir_tex_src_lod] = {true, 16},
606       [nir_tex_src_bias] = {true, 16},
607       [nir_tex_src_ms_index] = {true, 16},
608       [nir_tex_src_texture_offset] = {true, 16},
609       [nir_tex_src_sampler_offset] = {true, 16},
610    };
611 
612    /* Insert fences before lowering image atomics, since image atomics need
613     * different fencing than other image operations.
614     */
615    NIR_PASS(progress, s, nir_shader_intrinsics_pass, fence_image,
616             nir_metadata_block_index | nir_metadata_dominance, NULL);
617 
618    NIR_PASS(progress, s, nir_lower_image_atomics_to_global);
619 
620    NIR_PASS(progress, s, nir_shader_intrinsics_pass, legalize_image_lod,
621             nir_metadata_block_index | nir_metadata_dominance, NULL);
622    NIR_PASS(progress, s, nir_shader_intrinsics_pass, lower_images,
623             nir_metadata_block_index | nir_metadata_dominance, NULL);
624    NIR_PASS(progress, s, nir_legalize_16bit_sampler_srcs, tex_constraints);
625 
626    /* Fold constants after nir_legalize_16bit_sampler_srcs so we can detect 0 in
627     * lower_regular_texture. This is required for correctness.
628     */
629    NIR_PASS(progress, s, nir_opt_constant_folding);
630 
631    /* Lower texture sources after legalizing types (as the lowering depends on
632     * 16-bit multisample indices) but before lowering queries (as the lowering
633     * generates txs for array textures).
634     */
635    NIR_PASS(progress, s, nir_shader_instructions_pass, lower_regular_texture,
636             nir_metadata_none, NULL);
637    NIR_PASS(progress, s, nir_shader_instructions_pass, lower_tex_crawl,
638             nir_metadata_block_index | nir_metadata_dominance, NULL);
639 
640    return progress;
641 }
642 
643 static bool
lower_multisampled_store(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)644 lower_multisampled_store(nir_builder *b, nir_intrinsic_instr *intr,
645                          UNUSED void *data)
646 {
647    b->cursor = nir_before_instr(&intr->instr);
648 
649    if (intr->intrinsic != nir_intrinsic_bindless_image_store)
650       return false;
651 
652    if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_MS)
653       return false;
654 
655    nir_def *index_px = nir_u2u32(b, image_texel_address(b, intr, true));
656    nir_def *coord2d = coords_for_buffer_texture(b, index_px);
657 
658    nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
659    nir_src_rewrite(&intr->src[2], nir_imm_int(b, 0));
660    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
661    nir_intrinsic_set_image_array(intr, false);
662    return true;
663 }
664 
665 bool
agx_nir_lower_multisampled_image_store(nir_shader * s)666 agx_nir_lower_multisampled_image_store(nir_shader *s)
667 {
668    return nir_shader_intrinsics_pass(
669       s, lower_multisampled_store,
670       nir_metadata_block_index | nir_metadata_dominance, NULL);
671 }
672 
673 /*
674  * Given a non-bindless instruction, return whether agx_nir_lower_texture will
675  * lower it to something involving a descriptor crawl. This requires the driver
676  * to lower the instruction to bindless before calling agx_nir_lower_texture.
677  * The implementation just enumerates the cases handled in this file.
678  */
679 bool
agx_nir_needs_texture_crawl(nir_instr * instr)680 agx_nir_needs_texture_crawl(nir_instr *instr)
681 {
682    if (instr->type == nir_instr_type_intrinsic) {
683       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
684 
685       switch (intr->intrinsic) {
686       /* Queries, atomics always become a crawl */
687       case nir_intrinsic_image_size:
688       case nir_intrinsic_image_deref_size:
689       case nir_intrinsic_image_samples:
690       case nir_intrinsic_image_deref_samples:
691       case nir_intrinsic_image_atomic:
692       case nir_intrinsic_image_deref_atomic:
693       case nir_intrinsic_image_atomic_swap:
694       case nir_intrinsic_image_deref_atomic_swap:
695          return true;
696 
697       /* Multisampled stores need a crawl, others do not */
698       case nir_intrinsic_image_store:
699       case nir_intrinsic_image_deref_store:
700          return nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_MS;
701 
702       /* Loads do not need a crawl, even from buffers */
703       default:
704          return false;
705       }
706    } else if (instr->type == nir_instr_type_tex) {
707       nir_tex_instr *tex = nir_instr_as_tex(instr);
708 
709       /* Array textures get clamped to their size via txs */
710       if (tex->is_array && !(tex->backend_flags & AGX_TEXTURE_FLAG_NO_CLAMP))
711          return true;
712 
713       switch (tex->op) {
714       /* Queries always become a crawl */
715       case nir_texop_txs:
716       case nir_texop_texture_samples:
717       case nir_texop_query_levels:
718          return true;
719 
720       /* Buffer textures need their format read and txf needs its LOD clamped.
721        * Buffer textures are only read through txf.
722        */
723       case nir_texop_txf:
724       case nir_texop_txf_ms:
725          return has_nonzero_lod(tex) ||
726                 tex->sampler_dim == GLSL_SAMPLER_DIM_BUF;
727 
728       default:
729          return false;
730       }
731    }
732 
733    return false;
734 }
735