• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Collabora, Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nak_private.h"
7 #include "nir_builder.h"
8 #include "nir_format_convert.h"
9 
10 #include "util/u_math.h"
11 
12 static enum glsl_sampler_dim
remap_sampler_dim(enum glsl_sampler_dim dim)13 remap_sampler_dim(enum glsl_sampler_dim dim)
14 {
15    switch (dim) {
16    case GLSL_SAMPLER_DIM_SUBPASS: return GLSL_SAMPLER_DIM_2D;
17    case GLSL_SAMPLER_DIM_SUBPASS_MS: return GLSL_SAMPLER_DIM_MS;
18    default: return dim;
19    }
20 }
21 
22 static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,const struct nak_compiler * nak)23 lower_tex(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak)
24 {
25    b->cursor = nir_before_instr(&tex->instr);
26 
27    nir_def *tex_h = NULL, *samp_h = NULL, *coord = NULL, *ms_idx = NULL;
28    nir_def *offset = NULL, *lod = NULL, *bias = NULL, *min_lod = NULL;
29    nir_def *ddx = NULL, *ddy = NULL, *z_cmpr = NULL;
30    for (unsigned i = 0; i < tex->num_srcs; i++) {
31       switch (tex->src[i].src_type) {
32       case nir_tex_src_texture_handle: tex_h =     tex->src[i].src.ssa; break;
33       case nir_tex_src_sampler_handle: samp_h =    tex->src[i].src.ssa; break;
34       case nir_tex_src_coord:          coord =     tex->src[i].src.ssa; break;
35       case nir_tex_src_ms_index:       ms_idx =    tex->src[i].src.ssa; break;
36       case nir_tex_src_comparator:     z_cmpr =    tex->src[i].src.ssa; break;
37       case nir_tex_src_offset:         offset =    tex->src[i].src.ssa; break;
38       case nir_tex_src_lod:            lod =       tex->src[i].src.ssa; break;
39       case nir_tex_src_bias:           bias =      tex->src[i].src.ssa; break;
40       case nir_tex_src_min_lod:        min_lod =   tex->src[i].src.ssa; break;
41       case nir_tex_src_ddx:            ddx =       tex->src[i].src.ssa; break;
42       case nir_tex_src_ddy:            ddy =       tex->src[i].src.ssa; break;
43       default:
44          unreachable("Unsupported texture source");
45       }
46    }
47 
48    /* Combine sampler and texture into one if needed */
49    if (samp_h != NULL && samp_h != tex_h) {
50       tex_h = nir_ior(b, nir_iand_imm(b, tex_h,  0x000fffff),
51                          nir_iand_imm(b, samp_h, 0xfff00000));
52    }
53    tex_h = nir_u2u32(b, tex_h);
54 
55    /* Array index is treated separately, so pull it off if we have one. */
56    nir_def *arr_idx = NULL;
57    unsigned coord_components = tex->coord_components;
58    if (coord && tex->is_array) {
59       if (tex->op == nir_texop_lod) {
60          /* The HW wants an array index. Use zero. */
61          arr_idx = nir_imm_int(b, 0);
62       } else {
63          arr_idx = nir_channel(b, coord, --coord_components);
64 
65          /* Everything but texelFetch takes a float index
66           *
67           * TODO: Use F2I.U32.RNE
68           */
69          if (tex->op != nir_texop_txf && tex->op != nir_texop_txf_ms) {
70             arr_idx = nir_fadd_imm(b, arr_idx, 0.5);
71 
72             // TODO: Hardware seems to clamp negative values to zero for us
73             // in f2u, but we still need this fmax for constant folding.
74             arr_idx = nir_fmax(b, arr_idx, nir_imm_float(b, 0.0));
75 
76             arr_idx = nir_f2u32(b, arr_idx);
77          }
78 
79          arr_idx = nir_umin(b, arr_idx, nir_imm_int(b, UINT16_MAX));
80       }
81    }
82 
83    enum nak_nir_lod_mode lod_mode = NAK_NIR_LOD_MODE_AUTO;
84    if (tex->op == nir_texop_txf_ms) {
85       /* Multisampled textures do not have miplevels */
86       lod_mode = NAK_NIR_LOD_MODE_ZERO;
87       lod = NULL; /* We don't need this */
88    } else if (lod != NULL) {
89       nir_scalar lod_s = { .def = lod, .comp = 0 };
90       if (nir_scalar_is_const(lod_s) &&
91           nir_scalar_as_uint(lod_s) == 0) {
92          lod_mode = NAK_NIR_LOD_MODE_ZERO;
93          lod = NULL; /* We don't need this */
94       } else {
95          lod_mode = NAK_NIR_LOD_MODE_LOD;
96       }
97    } else if (bias != NULL) {
98       lod_mode = NAK_NIR_LOD_MODE_BIAS;
99       lod = bias;
100    }
101 
102    if (min_lod != NULL) {
103       switch (lod_mode) {
104       case NAK_NIR_LOD_MODE_AUTO:
105          lod_mode = NAK_NIR_LOD_MODE_CLAMP;
106          break;
107       case NAK_NIR_LOD_MODE_BIAS:
108          lod_mode = NAK_NIR_LOD_MODE_BIAS_CLAMP;
109          break;
110       default:
111          unreachable("Invalid min_lod");
112       }
113       min_lod = nir_f2u32(b, nir_fmax(b, nir_fmul_imm(b, min_lod, 256),
114                                          nir_imm_float(b, 16)));
115    }
116 
117    enum nak_nir_offset_mode offset_mode = NAK_NIR_OFFSET_MODE_NONE;
118    if (offset != NULL) {
119       /* For TG4, offsets, are packed into a single 32-bit value with 8 bits
120        * per component.  For all other texture instructions, offsets are
121        * packed into a single at most 16-bit value with 8 bits per component.
122        */
123       static const unsigned bits4[] = { 4, 4, 4, 4 };
124       static const unsigned bits8[] = { 8, 8, 8, 8 };
125       const unsigned *bits = tex->op == nir_texop_tg4 ? bits8 : bits4;
126 
127       offset = nir_pad_vector_imm_int(b, offset, 0, 4);
128       offset = nir_format_clamp_sint(b, offset, bits);
129       offset = nir_format_pack_uint(b, offset, bits, 4);
130       offset_mode = NAK_NIR_OFFSET_MODE_AOFFI;
131    } else if (nir_tex_instr_has_explicit_tg4_offsets(tex)) {
132       uint64_t off_u64 = 0;
133       for (uint8_t i = 0; i < 8; ++i) {
134          uint64_t off = (uint8_t)tex->tg4_offsets[i / 2][i % 2];
135          off_u64 |= off << (i * 8);
136       }
137       offset = nir_imm_ivec2(b, off_u64, off_u64 >> 32);
138       offset_mode = NAK_NIR_OFFSET_MODE_PER_PX;
139    }
140 
141    nir_def *src0[4] = { NULL, };
142    nir_def *src1[4] = { NULL, };
143    unsigned src0_comps = 0, src1_comps = 0;
144 
145 #define PUSH(a, x) do { \
146    nir_def *val = (x); \
147    assert(a##_comps < ARRAY_SIZE(a)); \
148    a[a##_comps++] = val; \
149 } while(0)
150 
151    if (nak->sm >= 50) {
152       if (tex->op == nir_texop_txd) {
153          PUSH(src0, tex_h);
154 
155          for (uint32_t i = 0; i < coord_components; i++)
156             PUSH(src0, nir_channel(b, coord, i));
157 
158          if (offset != NULL) {
159             nir_def *arr_off = nir_ishl_imm(b, offset, 16);
160             if (arr_idx)
161                arr_off = nir_ior(b, arr_off, arr_idx);
162             PUSH(src0, arr_off);
163          } else if (arr_idx != NULL) {
164             PUSH(src0, arr_idx);
165          }
166 
167          assert(ddx->num_components == coord_components);
168          for (uint32_t i = 0; i < coord_components; i++) {
169             PUSH(src1, nir_channel(b, ddx, i));
170             PUSH(src1, nir_channel(b, ddy, i));
171          }
172       } else {
173          if (min_lod != NULL) {
174             nir_def *arr_ml = nir_ishl_imm(b, min_lod, 16);
175             if (arr_idx)
176                arr_ml = nir_ior(b, arr_ml, arr_idx);
177             PUSH(src0, arr_ml);
178          } else if (arr_idx != NULL) {
179             PUSH(src0, arr_idx);
180          }
181 
182          for (uint32_t i = 0; i < coord_components; i++)
183             PUSH(src0, nir_channel(b, coord, i));
184 
185          PUSH(src1, tex_h);
186          if (ms_idx != NULL)
187             PUSH(src1, ms_idx);
188          if (lod != NULL)
189             PUSH(src1, lod);
190          if (offset_mode == NAK_NIR_OFFSET_MODE_AOFFI) {
191             PUSH(src1, offset);
192          } else if (offset_mode == NAK_NIR_OFFSET_MODE_PER_PX) {
193             PUSH(src1, nir_channel(b, offset, 0));
194             PUSH(src1, nir_channel(b, offset, 1));
195          }
196          if (z_cmpr != NULL)
197             PUSH(src1, z_cmpr);
198       }
199    } else {
200       unreachable("Unsupported shader model");
201    }
202 
203    nir_def *vec_srcs[2] = {
204       nir_vec(b, src0, src0_comps),
205       nir_vec(b, src1, src1_comps),
206    };
207 
208    tex->src[0].src_type = nir_tex_src_backend1;
209    nir_src_rewrite(&tex->src[0].src, vec_srcs[0]);
210 
211    tex->src[1].src_type = nir_tex_src_backend2;
212    nir_src_rewrite(&tex->src[1].src, vec_srcs[1]);
213 
214    /* Remove any extras */
215    while (tex->num_srcs > 2)
216       nir_tex_instr_remove_src(tex, tex->num_srcs - 1);
217 
218    tex->sampler_dim = remap_sampler_dim(tex->sampler_dim);
219 
220    struct nak_nir_tex_flags flags = {
221       .lod_mode = lod_mode,
222       .offset_mode = offset_mode,
223       .has_z_cmpr = tex->is_shadow,
224    };
225    STATIC_ASSERT(sizeof(flags) == sizeof(tex->backend_flags));
226    memcpy(&tex->backend_flags, &flags, sizeof(flags));
227 
228    if (tex->op == nir_texop_lod) {
229       b->cursor = nir_after_instr(&tex->instr);
230 
231       /* The outputs are flipped compared to what NIR expects */
232       nir_def *abs = nir_channel(b, &tex->def, 1);
233       nir_def *rel = nir_channel(b, &tex->def, 0);
234 
235       /* The returned values are not quite what we want:
236        * (a) convert from s16/u16 to f32
237        * (b) multiply by 1/256
238        *
239        * TODO: We can make this cheaper once we have 16-bit in NAK
240        */
241       abs = nir_u2f32(b, nir_iand_imm(b, abs, 0xffff));
242       nir_def *shift = nir_imm_int(b, 16);
243       rel = nir_i2f32(b, nir_ishr(b, nir_ishl(b, rel, shift), shift));
244 
245       abs = nir_fmul_imm(b, abs, 1.0 / 256.0);
246       rel = nir_fmul_imm(b, rel, 1.0 / 256.0);
247 
248       nir_def *res = nir_vec2(b, abs, rel);
249       nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
250    }
251 
252    return true;
253 }
254 
255 static bool
lower_txq(nir_builder * b,nir_tex_instr * tex,const struct nak_compiler * nak)256 lower_txq(nir_builder *b, nir_tex_instr *tex, const struct nak_compiler *nak)
257 {
258    b->cursor = nir_before_instr(&tex->instr);
259 
260    nir_def *tex_h = NULL, *lod = NULL;
261    for (unsigned i = 0; i < tex->num_srcs; i++) {
262       switch (tex->src[i].src_type) {
263       case nir_tex_src_texture_handle: tex_h = tex->src[i].src.ssa; break;
264       case nir_tex_src_sampler_handle: break; /* Ignored */
265       case nir_tex_src_lod:            lod = tex->src[i].src.ssa; break;
266       default:
267          unreachable("Unsupported texture source");
268       }
269    }
270 
271    /* TODO: We should only support 32-bit handles */
272    tex_h = nir_u2u32(b, tex_h);
273 
274    nir_def *txq_src;
275    nir_component_mask_t mask;
276    switch (tex->op) {
277    case nir_texop_txs:
278       tex->op = nir_texop_hdr_dim_nv;
279       if (lod == NULL)
280          lod = nir_imm_int(b, 0);
281       txq_src = nir_vec2(b, tex_h, lod);
282       mask = BITSET_MASK(tex->def.num_components);
283       break;
284    case nir_texop_query_levels:
285       tex->op = nir_texop_hdr_dim_nv;
286       txq_src = nir_vec2(b, tex_h, nir_imm_int(b, 0));
287       mask = BITSET_BIT(3);
288       break;
289    case nir_texop_texture_samples:
290       tex->op = nir_texop_tex_type_nv;
291       txq_src = tex_h;
292       mask = BITSET_BIT(2);
293       break;
294    default:
295       unreachable("Invalid texture query op");
296    }
297 
298    tex->src[0].src_type = nir_tex_src_backend1;
299    nir_src_rewrite(&tex->src[0].src, txq_src);
300 
301    /* Remove any extras */
302    while (tex->num_srcs > 1)
303       nir_tex_instr_remove_src(tex, tex->num_srcs - 1);
304 
305    tex->sampler_dim = remap_sampler_dim(tex->sampler_dim);
306 
307    b->cursor = nir_after_instr(&tex->instr);
308 
309    /* Only pick off slected components */
310    tex->def.num_components = 4;
311    nir_def *res = nir_channels(b, &tex->def, mask);
312    nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
313 
314    return true;
315 }
316 
317 static bool
shrink_image_load(nir_builder * b,nir_intrinsic_instr * intrin,const struct nak_compiler * nak)318 shrink_image_load(nir_builder *b, nir_intrinsic_instr *intrin,
319                   const struct nak_compiler *nak)
320 {
321    enum pipe_format format = nir_intrinsic_format(intrin);
322    nir_component_mask_t comps_read = nir_def_components_read(&intrin->def);
323 
324    if (intrin->def.bit_size == 64) {
325       assert(format == PIPE_FORMAT_NONE ||
326              format == PIPE_FORMAT_R64_UINT ||
327              format == PIPE_FORMAT_R64_SINT);
328 
329       b->cursor = nir_after_instr(&intrin->instr);
330 
331       nir_def *data_xy, *data_w;
332       if (comps_read & BITFIELD_BIT(3)) {
333          /* Thanks to descriptor indexing, we need to ensure that null
334           * descriptor behavior works properly.  In particular, normal zero
335           * reads will return (0, 0, 0, 1) whereas null descriptor reads need
336           * to return (0, 0, 0, 0).  This means we can't blindly extend with
337           * an alpha component of 1.  Instead, we need to trust the hardware
338           * to extend the original RG32 with z = 0 and w = 1 and copy the w
339           * value all the way out to 64-bit w value.
340           */
341          assert(intrin->num_components == 4);
342          assert(intrin->def.num_components == 4);
343          intrin->def.bit_size = 32;
344 
345          data_xy = nir_channels(b, &intrin->def, 0x3);
346          data_w = nir_channels(b, &intrin->def, 0x8);
347       } else {
348          intrin->num_components = 2;
349          intrin->def.num_components = 2;
350          intrin->def.bit_size = 32;
351 
352          data_xy = nir_channels(b, &intrin->def, 0x3);
353          data_w = nir_imm_int(b, 0);
354       }
355 
356       nir_def *data = nir_vec4(b, nir_pack_64_2x32(b, data_xy),
357                                nir_imm_zero(b, 1, 64),
358                                nir_imm_zero(b, 1, 64),
359                                nir_u2u64(b, data_w));
360 
361       nir_def_rewrite_uses_after(&intrin->def, data, data->parent_instr);
362       return true;
363    }
364 
365    if (format == PIPE_FORMAT_NONE)
366       return false;
367 
368    /* In order for null descriptors to work properly, we don't want to shrink
369     * loads when the alpha channel is read even if we know the format has
370     * fewer channels.
371     */
372    if (comps_read & BITFIELD_BIT(3))
373       return false;
374 
375    const unsigned old_comps = intrin->def.num_components;
376 
377    unsigned new_comps = util_format_get_nr_components(format);
378    new_comps = util_next_power_of_two(new_comps);
379    if (comps_read <= BITFIELD_MASK(2))
380       new_comps = 2;
381    if (comps_read <= BITFIELD_MASK(1))
382       new_comps = 1;
383 
384    if (new_comps >= intrin->num_components)
385       return false;
386 
387    b->cursor = nir_after_instr(&intrin->instr);
388 
389    intrin->num_components = new_comps;
390    intrin->def.num_components = new_comps;
391 
392    assert(new_comps <= 4);
393    nir_def *comps[4];
394    for (unsigned c = 0; c < new_comps; c++)
395       comps[c] = nir_channel(b, &intrin->def, c);
396    for (unsigned c = new_comps; c < 3; c++)
397       comps[c] = nir_imm_intN_t(b, 0, intrin->def.bit_size);
398    if (new_comps < 4)
399       comps[3] = nir_imm_intN_t(b, 1, intrin->def.bit_size);
400 
401    nir_def *data = nir_vec(b, comps, old_comps);
402    nir_def_rewrite_uses_after(&intrin->def, data, data->parent_instr);
403    return true;
404 }
405 
406 static bool
shrink_image_store(nir_builder * b,nir_intrinsic_instr * intrin,const struct nak_compiler * nak)407 shrink_image_store(nir_builder *b, nir_intrinsic_instr *intrin,
408                   const struct nak_compiler *nak)
409 {
410    enum pipe_format format = nir_intrinsic_format(intrin);
411    nir_def *data = intrin->src[3].ssa;
412 
413    if (data->bit_size == 64) {
414       assert(format == PIPE_FORMAT_NONE ||
415              format == PIPE_FORMAT_R64_UINT ||
416              format == PIPE_FORMAT_R64_SINT);
417 
418       b->cursor = nir_before_instr(&intrin->instr);
419 
420       /* For 64-bit image ops, we actually want a vec2 */
421       nir_def *data_vec2 = nir_unpack_64_2x32(b, nir_channel(b, data, 0));
422       nir_src_rewrite(&intrin->src[3], data_vec2);
423       intrin->num_components = 2;
424       return true;
425    }
426 
427    if (format == PIPE_FORMAT_NONE)
428       return false;
429 
430    unsigned new_comps = util_format_get_nr_components(format);
431    new_comps = util_next_power_of_two(new_comps);
432    if (new_comps >= intrin->num_components)
433       return false;
434 
435    b->cursor = nir_before_instr(&intrin->instr);
436 
437    nir_def *trimmed = nir_trim_vector(b, data, new_comps);
438    nir_src_rewrite(&intrin->src[3], trimmed);
439    intrin->num_components = new_comps;
440    return true;
441 }
442 
443 static bool
lower_image_txq(nir_builder * b,nir_intrinsic_instr * intrin,const struct nak_compiler * nak)444 lower_image_txq(nir_builder *b, nir_intrinsic_instr *intrin,
445                 const struct nak_compiler *nak)
446 {
447    b->cursor = nir_instr_remove(&intrin->instr);
448 
449    /* TODO: We should only support 32-bit handles */
450    nir_def *img_h = nir_u2u32(b, intrin->src[0].ssa);
451 
452    nir_tex_instr *txq = nir_tex_instr_create(b->shader, 1);
453    txq->sampler_dim = remap_sampler_dim(nir_intrinsic_image_dim(intrin));
454    txq->is_array = nir_intrinsic_image_array(intrin);
455    txq->dest_type = nir_type_int32;
456 
457    nir_component_mask_t mask;
458    switch (intrin->intrinsic) {
459    case nir_intrinsic_bindless_image_size: {
460       nir_def *lod = intrin->src[1].ssa;
461 
462       txq->op = nir_texop_hdr_dim_nv;
463       txq->src[0] = (nir_tex_src) {
464          .src_type = nir_tex_src_backend1,
465          .src = nir_src_for_ssa(nir_vec2(b, img_h, lod)),
466       };
467       mask = BITSET_MASK(intrin->def.num_components);
468       break;
469    }
470 
471    case nir_intrinsic_bindless_image_samples:
472       txq->op = nir_texop_tex_type_nv;
473       txq->src[0] = (nir_tex_src) {
474          .src_type = nir_tex_src_backend1,
475          .src = nir_src_for_ssa(img_h),
476       };
477       mask = BITSET_BIT(2);
478       break;
479 
480    default:
481       unreachable("Invalid image query op");
482    }
483 
484    nir_def_init(&txq->instr, &txq->def, 4, 32);
485    nir_builder_instr_insert(b, &txq->instr);
486 
487    /* Only pick off slected components */
488    nir_def *res = nir_channels(b, &txq->def, mask);
489 
490    nir_def_rewrite_uses(&intrin->def, res);
491 
492    return true;
493 }
494 
495 static bool
lower_tex_instr(nir_builder * b,nir_instr * instr,void * _data)496 lower_tex_instr(nir_builder *b, nir_instr *instr, void *_data)
497 {
498    const struct nak_compiler *nak = _data;
499 
500    switch (instr->type) {
501    case nir_instr_type_tex: {
502       nir_tex_instr *tex = nir_instr_as_tex(instr);
503       switch (tex->op) {
504       case nir_texop_tex:
505       case nir_texop_txb:
506       case nir_texop_txl:
507       case nir_texop_txd:
508       case nir_texop_txf:
509       case nir_texop_txf_ms:
510       case nir_texop_tg4:
511       case nir_texop_lod:
512          return lower_tex(b, tex, nak);
513       case nir_texop_txs:
514       case nir_texop_query_levels:
515       case nir_texop_texture_samples:
516          return lower_txq(b, tex, nak);
517       default:
518          unreachable("Unsupported texture instruction");
519       }
520    }
521    case nir_instr_type_intrinsic: {
522       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
523       switch (intrin->intrinsic) {
524       case nir_intrinsic_bindless_image_load:
525          return shrink_image_load(b, intrin, nak);
526       case nir_intrinsic_bindless_image_store:
527          return shrink_image_store(b, intrin, nak);
528       case nir_intrinsic_bindless_image_size:
529       case nir_intrinsic_bindless_image_samples:
530          return lower_image_txq(b, intrin, nak);
531       default:
532          return false;
533       }
534    }
535    default:
536       return false;
537    }
538 }
539 
540 bool
nak_nir_lower_tex(nir_shader * nir,const struct nak_compiler * nak)541 nak_nir_lower_tex(nir_shader *nir, const struct nak_compiler *nak)
542 {
543    return nir_shader_instructions_pass(nir, lower_tex_instr,
544                                        nir_metadata_block_index |
545                                        nir_metadata_dominance,
546                                        (void *)nak);
547 }
548