• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <stdint.h>
7 #include "compiler/agx_internal_formats.h"
8 #include "compiler/glsl_types.h"
9 #include "util/format/u_format.h"
10 #include "util/macros.h"
11 #include "agx_nir_format_helpers.h"
12 #include "agx_pack.h"
13 #include "agx_tilebuffer.h"
14 #include "nir.h"
15 #include "nir_builder.h"
16 #include "nir_builder_opcodes.h"
17 
18 #define AGX_NUM_TEXTURE_STATE_REGS 16
19 #define ALL_SAMPLES                0xFF
20 
21 struct ctx {
22    struct agx_tilebuffer_layout *tib;
23    uint8_t *colormasks;
24    bool *translucent;
25    unsigned bindless_base;
26    bool any_memory_stores;
27    bool layer_id_sr;
28    uint8_t outputs_written;
29 };
30 
31 static bool
tib_filter(const nir_instr * instr,UNUSED const void * _)32 tib_filter(const nir_instr *instr, UNUSED const void *_)
33 {
34    if (instr->type != nir_instr_type_intrinsic)
35       return false;
36 
37    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
38    if (intr->intrinsic != nir_intrinsic_store_output &&
39        intr->intrinsic != nir_intrinsic_load_output)
40       return false;
41 
42    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
43    assert(sem.dual_source_blend_index == 0 && "dual source blending lowered");
44    return (sem.location >= FRAG_RESULT_DATA0);
45 }
46 
47 static void
store_tilebuffer(nir_builder * b,struct agx_tilebuffer_layout * tib,enum pipe_format format,enum pipe_format logical_format,unsigned rt,nir_def * value,unsigned write_mask)48 store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
49                  enum pipe_format format, enum pipe_format logical_format,
50                  unsigned rt, nir_def *value, unsigned write_mask)
51 {
52    /* The hardware cannot extend for a 32-bit format. Extend ourselves. */
53    if (format == PIPE_FORMAT_R32_UINT && value->bit_size == 16) {
54       if (util_format_is_pure_sint(logical_format))
55          value = nir_i2i32(b, value);
56       else if (util_format_is_pure_uint(logical_format))
57          value = nir_u2u32(b, value);
58       else
59          value = nir_f2f32(b, value);
60    }
61 
62    /* Pure integer formatss need to be clamped in software, at least in some
63     * cases. We do so on store. Piglit gl-3.0-render-integer checks this, as
64     * does KHR-GL33.packed_pixels.*.
65     */
66    const struct util_format_description *desc =
67       util_format_description(logical_format);
68    unsigned c = util_format_get_first_non_void_channel(logical_format);
69 
70    if (desc->channel[c].size <= 16 &&
71        util_format_is_pure_integer(logical_format)) {
72 
73       unsigned bits[4] = {
74          desc->channel[0].size,
75          desc->channel[1].size,
76          desc->channel[2].size,
77          desc->channel[3].size,
78       };
79 
80       if (util_format_is_pure_sint(logical_format))
81          value = nir_format_clamp_sint(b, value, bits);
82       else
83          value = nir_format_clamp_uint(b, value, bits);
84 
85       value = nir_u2u16(b, value);
86    }
87 
88    uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
89    nir_store_local_pixel_agx(b, value, nir_imm_intN_t(b, ALL_SAMPLES, 16),
90                              .base = offset_B, .write_mask = write_mask,
91                              .format = format);
92 }
93 
94 static nir_def *
load_tilebuffer(nir_builder * b,struct agx_tilebuffer_layout * tib,uint8_t load_comps,uint8_t bit_size,unsigned rt,enum pipe_format format,enum pipe_format logical_format)95 load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
96                 uint8_t load_comps, uint8_t bit_size, unsigned rt,
97                 enum pipe_format format, enum pipe_format logical_format)
98 {
99    unsigned comps = util_format_get_nr_components(logical_format);
100    bool f16 = (format == PIPE_FORMAT_R16_FLOAT);
101 
102    /* Don't load with F16 */
103    if (f16)
104       format = PIPE_FORMAT_R16_UINT;
105 
106    uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
107    nir_def *res = nir_load_local_pixel_agx(
108       b, MIN2(load_comps, comps), f16 ? 16 : bit_size,
109       nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = offset_B, .format = format);
110 
111    /* Extend floats */
112    if (f16 && bit_size != 16) {
113       assert(bit_size == 32);
114       res = nir_f2f32(b, res);
115    }
116 
117    res = nir_sign_extend_if_sint(b, res, logical_format);
118    return nir_pad_vector(b, res, load_comps);
119 }
120 
121 /*
122  * As a simple implementation, we use image load/store instructions to access
123  * spilled render targets. The driver will supply corresponding texture and PBE
124  * descriptors for each render target, accessed bindlessly
125  *
126  * Note that this lower happens after driver bindings are lowered, so the
127  * bindless handle is in the AGX-specific format.
128  */
129 static nir_def *
handle_for_rt(nir_builder * b,unsigned base,unsigned rt,bool pbe,bool * bindless)130 handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
131               bool *bindless)
132 {
133    unsigned index = base + (2 * rt) + (pbe ? 1 : 0);
134    *bindless = (*bindless) || (index >= AGX_NUM_TEXTURE_STATE_REGS);
135 
136    if (*bindless)
137       return nir_load_texture_handle_agx(b, nir_imm_int(b, index));
138    else
139       return nir_imm_intN_t(b, index, 16);
140 }
141 
142 static enum glsl_sampler_dim
dim_for_rt(nir_builder * b,unsigned nr_samples,nir_def ** sample)143 dim_for_rt(nir_builder *b, unsigned nr_samples, nir_def **sample)
144 {
145    if (nr_samples == 1) {
146       *sample = nir_imm_intN_t(b, 0, 16);
147       return GLSL_SAMPLER_DIM_2D;
148    } else {
149       *sample = nir_load_sample_id(b);
150       b->shader->info.fs.uses_sample_shading = true;
151       return GLSL_SAMPLER_DIM_MS;
152    }
153 }
154 
155 static nir_def *
image_coords(nir_builder * b,nir_def * layer_id)156 image_coords(nir_builder *b, nir_def *layer_id)
157 {
158    nir_def *xy = nir_u2u32(b, nir_load_pixel_coord(b));
159    nir_def *vec = nir_pad_vector(b, xy, 4);
160 
161    if (layer_id)
162       vec = nir_vector_insert_imm(b, vec, layer_id, 2);
163 
164    return vec;
165 }
166 
167 static void
store_memory(nir_builder * b,unsigned bindless_base,unsigned nr_samples,nir_def * layer_id,enum pipe_format format,unsigned rt,nir_def * value)168 store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
169              nir_def *layer_id, enum pipe_format format, unsigned rt,
170              nir_def *value)
171 {
172    /* Force bindless for multisampled image writes since they will be lowered
173     * with a descriptor crawl later.
174     */
175    bool bindless = (nr_samples > 1);
176    nir_def *image = handle_for_rt(b, bindless_base, rt, true, &bindless);
177    nir_def *zero = nir_imm_intN_t(b, 0, 16);
178    nir_def *lod = zero;
179 
180    nir_def *sample;
181    enum glsl_sampler_dim dim = dim_for_rt(b, nr_samples, &sample);
182    nir_def *coords = image_coords(b, layer_id);
183 
184    nir_begin_invocation_interlock(b);
185 
186    if (nr_samples > 1) {
187       nir_def *coverage = nir_load_sample_mask(b);
188       nir_def *covered = nir_ubitfield_extract(
189          b, coverage, nir_u2u32(b, sample), nir_imm_int(b, 1));
190 
191       nir_push_if(b, nir_ine_imm(b, covered, 0));
192    }
193 
194    if (bindless) {
195       nir_bindless_image_store(b, image, coords, sample, value, lod,
196                                .image_dim = dim, .image_array = true,
197                                .format = format);
198    } else {
199       nir_image_store(b, image, coords, sample, value, lod, .image_dim = dim,
200                       .image_array = true, .format = format);
201    }
202 
203    if (nr_samples > 1)
204       nir_pop_if(b, NULL);
205 
206    b->shader->info.writes_memory = true;
207 }
208 
209 static nir_def *
load_memory(nir_builder * b,unsigned bindless_base,unsigned nr_samples,nir_def * layer_id,uint8_t comps,uint8_t bit_size,unsigned rt,enum pipe_format format)210 load_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
211             nir_def *layer_id, uint8_t comps, uint8_t bit_size, unsigned rt,
212             enum pipe_format format)
213 {
214    bool bindless = false;
215    nir_def *image = handle_for_rt(b, bindless_base, rt, false, &bindless);
216    nir_def *zero = nir_imm_intN_t(b, 0, 16);
217    nir_def *lod = zero;
218 
219    nir_def *sample;
220    enum glsl_sampler_dim dim = dim_for_rt(b, nr_samples, &sample);
221    nir_def *coords = image_coords(b, layer_id);
222 
223    /* Ensure pixels below this one have written out their results */
224    nir_begin_invocation_interlock(b);
225 
226    if (bindless) {
227       return nir_bindless_image_load(b, comps, bit_size, image, coords, sample,
228                                      lod, .image_dim = dim, .image_array = true,
229                                      .format = format);
230    } else {
231       return nir_image_load(b, comps, bit_size, image, coords, sample, lod,
232                             .image_dim = dim, .image_array = true,
233                             .format = format);
234    }
235 }
236 
237 nir_def *
agx_internal_layer_id(nir_builder * b)238 agx_internal_layer_id(nir_builder *b)
239 {
240    /* In the background and end-of-tile programs, the layer ID is available as
241     * sr2, the Z component of the workgroup index.
242     */
243    return nir_channel(b, nir_load_workgroup_id(b), 2);
244 }
245 
246 static nir_def *
tib_layer_id(nir_builder * b,struct ctx * ctx)247 tib_layer_id(nir_builder *b, struct ctx *ctx)
248 {
249    if (ctx->layer_id_sr) {
250       return agx_internal_layer_id(b);
251    } else {
252       /* Otherwise, the layer ID is loaded as a flat varying. */
253       b->shader->info.inputs_read |= VARYING_BIT_LAYER;
254 
255       return nir_load_input(b, 1, 32, nir_imm_int(b, 0),
256                             .io_semantics.location = VARYING_SLOT_LAYER);
257    }
258 }
259 
260 static nir_def *
tib_impl(nir_builder * b,nir_instr * instr,void * data)261 tib_impl(nir_builder *b, nir_instr *instr, void *data)
262 {
263    struct ctx *ctx = data;
264    struct agx_tilebuffer_layout *tib = ctx->tib;
265    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
266 
267    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
268    unsigned rt = sem.location - FRAG_RESULT_DATA0;
269    assert(rt < ARRAY_SIZE(tib->logical_format));
270 
271    enum pipe_format logical_format = tib->logical_format[rt];
272    enum pipe_format format = agx_tilebuffer_physical_format(tib, rt);
273    unsigned comps = util_format_get_nr_components(logical_format);
274 
275    if (intr->intrinsic == nir_intrinsic_store_output) {
276       ctx->outputs_written |= BITFIELD_BIT(rt);
277 
278       /* Only write components that actually exist */
279       uint16_t write_mask = (uint16_t)BITFIELD_MASK(comps);
280 
281       /* Delete stores to nonexistent render targets */
282       if (logical_format == PIPE_FORMAT_NONE)
283          return NIR_LOWER_INSTR_PROGRESS_REPLACE;
284 
285       /* Only write colours masked by the blend state */
286       if (ctx->colormasks)
287          write_mask &= ctx->colormasks[rt];
288 
289       /* Masked stores require a translucent pass type */
290       if (write_mask != BITFIELD_MASK(comps)) {
291          assert(ctx->translucent != NULL &&
292                 "colour masking requires translucency");
293 
294          assert(agx_tilebuffer_supports_mask(tib, rt));
295          *(ctx->translucent) = true;
296       }
297 
298       /* But we ignore the NIR write mask for that, since it's basically an
299        * optimization hint.
300        */
301       if (agx_tilebuffer_supports_mask(tib, rt))
302          write_mask &= nir_intrinsic_write_mask(intr);
303 
304       /* Delete stores that are entirely masked out */
305       if (!write_mask)
306          return NIR_LOWER_INSTR_PROGRESS_REPLACE;
307 
308       nir_def *value = intr->src[0].ssa;
309 
310       /* Trim to format as required by hardware */
311       value = nir_trim_vector(b, intr->src[0].ssa, comps);
312 
313       if (tib->spilled[rt]) {
314          store_memory(b, ctx->bindless_base, tib->nr_samples,
315                       tib_layer_id(b, ctx), logical_format, rt, value);
316          ctx->any_memory_stores = true;
317       } else {
318          store_tilebuffer(b, tib, format, logical_format, rt, value,
319                           write_mask);
320       }
321 
322       return NIR_LOWER_INSTR_PROGRESS_REPLACE;
323    } else {
324       uint8_t bit_size = intr->def.bit_size;
325 
326       /* Loads from non-existent render targets are undefined in NIR but not
327        * possible to encode in the hardware, delete them.
328        */
329       if (logical_format == PIPE_FORMAT_NONE) {
330          return nir_undef(b, intr->num_components, bit_size);
331       } else if (tib->spilled[rt]) {
332          *(ctx->translucent) = true;
333 
334          return load_memory(b, ctx->bindless_base, tib->nr_samples,
335                             tib_layer_id(b, ctx), intr->num_components,
336                             bit_size, rt, logical_format);
337       } else {
338          return load_tilebuffer(b, tib, intr->num_components, bit_size, rt,
339                                 format, logical_format);
340       }
341    }
342 }
343 
344 bool
agx_nir_lower_tilebuffer(nir_shader * shader,struct agx_tilebuffer_layout * tib,uint8_t * colormasks,unsigned * bindless_base,bool * translucent,bool layer_id_sr)345 agx_nir_lower_tilebuffer(nir_shader *shader, struct agx_tilebuffer_layout *tib,
346                          uint8_t *colormasks, unsigned *bindless_base,
347                          bool *translucent, bool layer_id_sr)
348 {
349    assert(shader->info.stage == MESA_SHADER_FRAGMENT);
350 
351    struct ctx ctx = {
352       .tib = tib,
353       .colormasks = colormasks,
354       .translucent = translucent,
355       .layer_id_sr = layer_id_sr,
356    };
357 
358    /* Allocate 1 texture + 1 PBE descriptor for each spilled descriptor */
359    if (agx_tilebuffer_spills(tib)) {
360       assert(bindless_base != NULL && "must be specified if spilling");
361       ctx.bindless_base = *bindless_base;
362       *bindless_base += (AGX_MAX_RENDER_TARGETS * 2);
363    }
364 
365    bool progress =
366       nir_shader_lower_instructions(shader, tib_filter, tib_impl, &ctx);
367 
368    /* Flush at end */
369    if (ctx.any_memory_stores) {
370       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
371       nir_builder b = nir_builder_at(nir_after_impl(impl));
372       nir_fence_pbe_to_tex_pixel_agx(&b);
373    }
374 
375    /* If there are any render targets bound to the framebuffer that aren't
376     * statically written by the fragment shader, that acts as an implicit mask
377     * and requires translucency.
378     *
379     * XXX: Could be optimized.
380     */
381    for (unsigned i = 0; i < ARRAY_SIZE(tib->logical_format); ++i) {
382       bool exists = tib->logical_format[i] != PIPE_FORMAT_NONE;
383       bool written = ctx.outputs_written & BITFIELD_BIT(i);
384 
385       if (translucent)
386          *translucent |= (exists && !written);
387    }
388 
389    return progress;
390 }
391