• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_nir_lower_vbo.h"
7 #include "asahi/compiler/agx_internal_formats.h"
8 #include "compiler/nir/nir_builder.h"
9 #include "compiler/nir/nir_format_convert.h"
10 #include "util/bitset.h"
11 #include "util/u_math.h"
12 #include "shader_enums.h"
13 
14 static bool
is_rgb10_a2(const struct util_format_description * desc)15 is_rgb10_a2(const struct util_format_description *desc)
16 {
17    return desc->channel[0].shift == 0 && desc->channel[0].size == 10 &&
18           desc->channel[1].shift == 10 && desc->channel[1].size == 10 &&
19           desc->channel[2].shift == 20 && desc->channel[2].size == 10 &&
20           desc->channel[3].shift == 30 && desc->channel[3].size == 2;
21 }
22 
23 static enum pipe_format
agx_vbo_internal_format(enum pipe_format format)24 agx_vbo_internal_format(enum pipe_format format)
25 {
26    const struct util_format_description *desc = util_format_description(format);
27 
28    /* RGB10A2 formats are native for UNORM and unpacked otherwise */
29    if (is_rgb10_a2(desc)) {
30       if (desc->is_unorm)
31          return PIPE_FORMAT_R10G10B10A2_UNORM;
32       else
33          return PIPE_FORMAT_R32_UINT;
34    }
35 
36    /* R11G11B10F is native and special */
37    if (format == PIPE_FORMAT_R11G11B10_FLOAT)
38       return format;
39 
40    /* No other non-array formats handled */
41    if (!desc->is_array)
42       return PIPE_FORMAT_NONE;
43 
44    /* Otherwise look at one (any) channel */
45    int idx = util_format_get_first_non_void_channel(format);
46    if (idx < 0)
47       return PIPE_FORMAT_NONE;
48 
49    /* We only handle RGB formats (we could do SRGB if we wanted though?) */
50    if ((desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) ||
51        (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN))
52       return PIPE_FORMAT_NONE;
53 
54    /* We have native 8-bit and 16-bit normalized formats */
55    struct util_format_channel_description chan = desc->channel[idx];
56 
57    if (chan.normalized) {
58       if (chan.size == 8)
59          return desc->is_unorm ? PIPE_FORMAT_R8_UNORM : PIPE_FORMAT_R8_SNORM;
60       else if (chan.size == 16)
61          return desc->is_unorm ? PIPE_FORMAT_R16_UNORM : PIPE_FORMAT_R16_SNORM;
62    }
63 
64    /* Otherwise map to the corresponding integer format */
65    switch (chan.size) {
66    case 32:
67       return PIPE_FORMAT_R32_UINT;
68    case 16:
69       return PIPE_FORMAT_R16_UINT;
70    case 8:
71       return PIPE_FORMAT_R8_UINT;
72    default:
73       return PIPE_FORMAT_NONE;
74    }
75 }
76 
77 bool
agx_vbo_supports_format(enum pipe_format format)78 agx_vbo_supports_format(enum pipe_format format)
79 {
80    return agx_vbo_internal_format(format) != PIPE_FORMAT_NONE;
81 }
82 
83 static nir_def *
apply_swizzle_channel(nir_builder * b,nir_def * vec,unsigned swizzle,bool is_int)84 apply_swizzle_channel(nir_builder *b, nir_def *vec, unsigned swizzle,
85                       bool is_int)
86 {
87    switch (swizzle) {
88    case PIPE_SWIZZLE_X:
89       return nir_channel(b, vec, 0);
90    case PIPE_SWIZZLE_Y:
91       return nir_channel(b, vec, 1);
92    case PIPE_SWIZZLE_Z:
93       return nir_channel(b, vec, 2);
94    case PIPE_SWIZZLE_W:
95       return nir_channel(b, vec, 3);
96    case PIPE_SWIZZLE_0:
97       return nir_imm_intN_t(b, 0, vec->bit_size);
98    case PIPE_SWIZZLE_1:
99       return is_int ? nir_imm_intN_t(b, 1, vec->bit_size)
100                     : nir_imm_floatN_t(b, 1.0, vec->bit_size);
101    default:
102       unreachable("Invalid swizzle channel");
103    }
104 }
105 
106 static bool
pass(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)107 pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
108 {
109    if (intr->intrinsic != nir_intrinsic_load_input)
110       return false;
111 
112    struct agx_attribute *attribs = data;
113    b->cursor = nir_before_instr(&intr->instr);
114 
115    nir_src *offset_src = nir_get_io_offset_src(intr);
116    assert(nir_src_is_const(*offset_src) && "no attribute indirects");
117    unsigned index = nir_intrinsic_base(intr) + nir_src_as_uint(*offset_src);
118 
119    struct agx_attribute attrib = attribs[index];
120    uint32_t stride = attrib.stride;
121    uint16_t offset = attrib.src_offset;
122 
123    const struct util_format_description *desc =
124       util_format_description(attrib.format);
125    int chan = util_format_get_first_non_void_channel(attrib.format);
126    assert(chan >= 0);
127 
128    bool is_float = desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
129    bool is_unsigned = desc->channel[chan].type == UTIL_FORMAT_TYPE_UNSIGNED;
130    bool is_signed = desc->channel[chan].type == UTIL_FORMAT_TYPE_SIGNED;
131    bool is_fixed = desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
132    bool is_int = util_format_is_pure_integer(attrib.format);
133 
134    assert((is_float ^ is_unsigned ^ is_signed ^ is_fixed) && "Invalid format");
135 
136    enum pipe_format interchange_format = agx_vbo_internal_format(attrib.format);
137    assert(interchange_format != PIPE_FORMAT_NONE);
138 
139    unsigned interchange_align = util_format_get_blocksize(interchange_format);
140    unsigned interchange_comps = util_format_get_nr_components(attrib.format);
141 
142    /* In the hardware, uint formats zero-extend and float formats convert.
143     * However, non-uint formats using a uint interchange format shouldn't be
144     * zero extended.
145     */
146    unsigned interchange_register_size =
147       util_format_is_pure_uint(interchange_format) &&
148             !util_format_is_pure_uint(attrib.format)
149          ? (interchange_align * 8)
150          : intr->def.bit_size;
151 
152    /* Non-UNORM R10G10B10A2 loaded as a scalar and unpacked */
153    if (interchange_format == PIPE_FORMAT_R32_UINT && !desc->is_array)
154       interchange_comps = 1;
155 
156    /* Calculate the element to fetch the vertex for. Divide the instance ID by
157     * the divisor for per-instance data. Divisor=0 specifies per-vertex data.
158     */
159    nir_def *el;
160    if (attrib.divisor) {
161       el = nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor);
162       el = nir_iadd(b, el, nir_load_base_instance(b));
163 
164       BITSET_SET(b->shader->info.system_values_read,
165                  SYSTEM_VALUE_BASE_INSTANCE);
166    } else {
167       el = nir_load_vertex_id(b);
168    }
169 
170    /* VBO bases are per-attribute, otherwise they're per-buffer. This allows
171     * memory sinks to work properly with robustness, allows folding
172     * the src_offset into the VBO base to save an add in the shader, and reduces
173     * the size of the vertex fetch key. That last piece allows reusing a linked
174     * VS with both separate and interleaved attributes.
175     */
176    nir_def *buf_handle = nir_imm_int(b, index);
177 
178    /* Robustness is handled at the ID level */
179    nir_def *bounds = nir_load_attrib_clamp_agx(b, buf_handle);
180 
181    /* For now, robustness is always applied. This gives GL robustness semantics.
182     * For robustBufferAccess2, we'll want to check for out-of-bounds access
183     * (where el > bounds), and replace base with the address of a zero sink.
184     * With soft fault and a large enough sink, we don't need to clamp the index,
185     * allowing that robustness behaviour to be implemented in 2 cmpsel
186     * before the load. That is faster than the 4 cmpsel required after the load,
187     * and it avoids waiting on the load which should help prolog performance.
188     *
189     * TODO: Plumb through soft fault information to skip this.
190     *
191     * TODO: Add a knob for robustBufferAccess2 semantics.
192     */
193    bool robust = true;
194    if (robust) {
195       el = nir_umin(b, el, bounds);
196    }
197 
198    nir_def *base = nir_load_vbo_base_agx(b, buf_handle);
199 
200    assert((stride % interchange_align) == 0 && "must be aligned");
201    assert((offset % interchange_align) == 0 && "must be aligned");
202 
203    unsigned stride_el = stride / interchange_align;
204    unsigned offset_el = offset / interchange_align;
205    unsigned shift = 0;
206 
207    /* Try to use the small shift on the load itself when possible. This can save
208     * an instruction. Shifts are only available for regular interchange formats,
209     * i.e. the set of formats that support masking.
210     */
211    if (offset_el == 0 && (stride_el == 2 || stride_el == 4) &&
212        agx_internal_format_supports_mask(
213           (enum agx_internal_formats)interchange_format)) {
214 
215       shift = util_logbase2(stride_el);
216       stride_el = 1;
217    }
218 
219    nir_def *stride_offset_el =
220       nir_iadd_imm(b, nir_imul_imm(b, el, stride_el), offset_el);
221 
222    /* Load the raw vector */
223    nir_def *memory = nir_load_constant_agx(
224       b, interchange_comps, interchange_register_size, base, stride_offset_el,
225       .format = interchange_format, .base = shift);
226 
227    unsigned dest_size = intr->def.bit_size;
228 
229    /* Unpack but do not convert non-native non-array formats */
230    if (is_rgb10_a2(desc) && interchange_format == PIPE_FORMAT_R32_UINT) {
231       unsigned bits[] = {10, 10, 10, 2};
232 
233       if (is_signed)
234          memory = nir_format_unpack_sint(b, memory, bits, 4);
235       else
236          memory = nir_format_unpack_uint(b, memory, bits, 4);
237    }
238 
239    if (desc->channel[chan].normalized) {
240       /* 8/16-bit normalized formats are native, others converted here */
241       if (is_rgb10_a2(desc) && is_signed) {
242          unsigned bits[] = {10, 10, 10, 2};
243          memory = nir_format_snorm_to_float(b, memory, bits);
244       } else if (desc->channel[chan].size == 32) {
245          assert(desc->is_array && "no non-array 32-bit norm formats");
246          unsigned bits[] = {32, 32, 32, 32};
247 
248          if (is_signed)
249             memory = nir_format_snorm_to_float(b, memory, bits);
250          else
251             memory = nir_format_unorm_to_float(b, memory, bits);
252       }
253    } else if (desc->channel[chan].pure_integer) {
254       /* Zero-extension is native, may need to sign extend */
255       if (is_signed)
256          memory = nir_i2iN(b, memory, dest_size);
257    } else {
258       if (is_unsigned)
259          memory = nir_u2fN(b, memory, dest_size);
260       else if (is_signed || is_fixed)
261          memory = nir_i2fN(b, memory, dest_size);
262       else
263          memory = nir_f2fN(b, memory, dest_size);
264 
265       /* 16.16 fixed-point weirdo GL formats need to be scaled */
266       if (is_fixed) {
267          assert(desc->is_array && desc->channel[chan].size == 32);
268          assert(dest_size == 32 && "overflow if smaller");
269          memory = nir_fmul_imm(b, memory, 1.0 / 65536.0);
270       }
271    }
272 
273    /* We now have a properly formatted vector of the components in memory. Apply
274     * the format swizzle forwards to trim/pad/reorder as needed.
275     */
276    nir_def *channels[4] = {NULL};
277 
278    for (unsigned i = 0; i < intr->num_components; ++i) {
279       unsigned c = nir_intrinsic_component(intr) + i;
280       channels[i] = apply_swizzle_channel(b, memory, desc->swizzle[c], is_int);
281    }
282 
283    nir_def *logical = nir_vec(b, channels, intr->num_components);
284    nir_def_rewrite_uses(&intr->def, logical);
285    return true;
286 }
287 
288 bool
agx_nir_lower_vbo(nir_shader * shader,struct agx_attribute * attribs)289 agx_nir_lower_vbo(nir_shader *shader, struct agx_attribute *attribs)
290 {
291    assert(shader->info.stage == MESA_SHADER_VERTEX);
292    return nir_shader_intrinsics_pass(
293       shader, pass, nir_metadata_block_index | nir_metadata_dominance, attribs);
294 }
295