1 /*
2 * Copyright 2024 Alyssa Rosenzweig
3 * Copyright 2024 Valve Corporation
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "gallium/include/pipe/p_defines.h"
8 #include "util/format/u_formats.h"
9 #include "agx_abi.h"
10 #include "agx_linker.h"
11 #include "agx_nir.h"
12 #include "agx_nir_lower_gs.h"
13 #include "agx_nir_lower_vbo.h"
14 #include "agx_pack.h"
15 #include "agx_tilebuffer.h"
16 #include "libagx.h"
17 #include "nir.h"
18 #include "nir_builder.h"
19 #include "nir_builder_opcodes.h"
20 #include "nir_lower_blend.h"
21 #include "shader_enums.h"
22
23 /*
24 * Insert code into a fragment shader to lower polygon stipple. The stipple is
25 * passed in a sideband, rather than requiring a texture binding. This is
26 * simpler for drivers to integrate and might be more efficient.
27 */
28 static bool
agx_nir_lower_poly_stipple(nir_shader * s)29 agx_nir_lower_poly_stipple(nir_shader *s)
30 {
31 assert(s->info.stage == MESA_SHADER_FRAGMENT);
32
33 /* Insert at the beginning for performance. */
34 nir_builder b_ =
35 nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
36 nir_builder *b = &b_;
37
38 /* The stipple coordinate is defined at the window coordinate mod 32. It's
39 * reversed along the X-axis to simplify the driver, hence the NOT.
40 */
41 nir_def *raw = nir_u2u32(b, nir_load_pixel_coord(b));
42 nir_def *coord = nir_umod_imm(
43 b,
44 nir_vec2(b, nir_inot(b, nir_channel(b, raw, 0)), nir_channel(b, raw, 1)),
45 32);
46
47 /* Extract the column from the packed bitfield */
48 nir_def *pattern = nir_load_polygon_stipple_agx(b, nir_channel(b, coord, 1));
49 nir_def *bit = nir_ubitfield_extract(b, pattern, nir_channel(b, coord, 0),
50 nir_imm_int(b, 1));
51
52 /* Discard fragments where the pattern is 0 */
53 nir_demote_if(b, nir_ieq_imm(b, bit, 0));
54 s->info.fs.uses_discard = true;
55
56 nir_metadata_preserve(b->impl, nir_metadata_control_flow);
57 return true;
58 }
59
60 static bool
lower_vbo(nir_shader * s,const struct agx_velem_key * key,const struct agx_robustness rs)61 lower_vbo(nir_shader *s, const struct agx_velem_key *key,
62 const struct agx_robustness rs)
63 {
64 struct agx_attribute out[AGX_MAX_VBUFS];
65
66 for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) {
67 out[i] = (struct agx_attribute){
68 .divisor = key[i].divisor,
69 .stride = key[i].stride,
70 .format = key[i].format,
71 .instanced = key[i].instanced,
72 };
73 }
74
75 return agx_nir_lower_vbo(s, out, rs);
76 }
77
78 static int
map_vs_part_uniform(nir_intrinsic_instr * intr,unsigned nr_attribs)79 map_vs_part_uniform(nir_intrinsic_instr *intr, unsigned nr_attribs)
80 {
81 switch (intr->intrinsic) {
82 case nir_intrinsic_load_vbo_base_agx:
83 return 4 * nir_src_as_uint(intr->src[0]);
84 case nir_intrinsic_load_attrib_clamp_agx:
85 return (4 * nr_attribs) + (2 * nir_src_as_uint(intr->src[0]));
86 case nir_intrinsic_load_first_vertex:
87 return (6 * nr_attribs);
88 case nir_intrinsic_load_base_instance:
89 return (6 * nr_attribs) + 2;
90 case nir_intrinsic_load_input_assembly_buffer_agx:
91 return (6 * nr_attribs) + 8;
92 default:
93 return -1;
94 }
95 }
96
97 static int
map_fs_part_uniform(nir_intrinsic_instr * intr)98 map_fs_part_uniform(nir_intrinsic_instr *intr)
99 {
100 switch (intr->intrinsic) {
101 case nir_intrinsic_load_blend_const_color_r_float:
102 return 4;
103 case nir_intrinsic_load_blend_const_color_g_float:
104 return 6;
105 case nir_intrinsic_load_blend_const_color_b_float:
106 return 8;
107 case nir_intrinsic_load_blend_const_color_a_float:
108 return 10;
109 default:
110 return -1;
111 }
112 }
113
114 static bool
lower_non_monolithic_uniforms(nir_builder * b,nir_intrinsic_instr * intr,void * data)115 lower_non_monolithic_uniforms(nir_builder *b, nir_intrinsic_instr *intr,
116 void *data)
117 {
118 int unif;
119 if (b->shader->info.stage == MESA_SHADER_VERTEX) {
120 unsigned *nr_attribs = data;
121 unif = map_vs_part_uniform(intr, *nr_attribs);
122 } else {
123 unif = map_fs_part_uniform(intr);
124 }
125
126 if (unif >= 0) {
127 b->cursor = nir_instr_remove(&intr->instr);
128 nir_def *load = nir_load_preamble(b, 1, intr->def.bit_size, .base = unif);
129 nir_def_rewrite_uses(&intr->def, load);
130 return true;
131 } else if (intr->intrinsic == nir_intrinsic_load_texture_handle_agx) {
132 b->cursor = nir_instr_remove(&intr->instr);
133 nir_def *offs =
134 nir_imul_imm(b, nir_u2u32(b, intr->src[0].ssa), AGX_TEXTURE_LENGTH);
135 nir_def_rewrite_uses(&intr->def, nir_vec2(b, nir_imm_int(b, 0), offs));
136 return true;
137 } else {
138 return false;
139 }
140 }
141
142 static bool
lower_adjacency(nir_builder * b,nir_intrinsic_instr * intr,void * data)143 lower_adjacency(nir_builder *b, nir_intrinsic_instr *intr, void *data)
144 {
145 const struct agx_vs_prolog_key *key = data;
146 b->cursor = nir_before_instr(&intr->instr);
147
148 if (intr->intrinsic != nir_intrinsic_load_vertex_id)
149 return false;
150
151 nir_def *id = nir_load_vertex_id(b);
152
153 if (key->adjacency == MESA_PRIM_LINES_ADJACENCY) {
154 id = libagx_map_to_line_adj(b, id);
155 } else if (key->adjacency == MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) {
156 id = libagx_map_to_tri_strip_adj(b, id);
157 } else if (key->adjacency == MESA_PRIM_LINE_STRIP_ADJACENCY) {
158 id = libagx_map_to_line_strip_adj(b, id);
159 } else if (key->adjacency == MESA_PRIM_TRIANGLES_ADJACENCY) {
160 /* Sequence (0, 2, 4), (6, 8, 10), ... */
161 id = nir_imul_imm(b, id, 2);
162 } else {
163 unreachable("unknown");
164 }
165
166 id = agx_nir_load_vertex_id(b, id, key->sw_index_size_B);
167
168 nir_def_replace(&intr->def, id);
169 return true;
170 }
171
172 void
agx_nir_vs_prolog(nir_builder * b,const void * key_)173 agx_nir_vs_prolog(nir_builder *b, const void *key_)
174 {
175 const struct agx_vs_prolog_key *key = key_;
176 b->shader->info.stage = MESA_SHADER_VERTEX;
177 b->shader->info.name = "VS prolog";
178
179 /* First, construct a passthrough shader reading each attribute and exporting
180 * the value. We also need to export vertex/instance ID in their usual regs.
181 */
182 unsigned i = 0;
183 nir_def *vec = NULL;
184 unsigned vec_idx = ~0;
185 BITSET_FOREACH_SET(i, key->component_mask, AGX_MAX_ATTRIBS * 4) {
186 unsigned a = i / 4;
187 unsigned c = i % 4;
188
189 if (vec_idx != a) {
190 vec = nir_load_input(b, 4, 32, nir_imm_int(b, 0), .base = a);
191 vec_idx = a;
192 }
193
194 nir_export_agx(b, nir_channel(b, vec, c), .base = AGX_ABI_VIN_ATTRIB(i));
195 }
196
197 nir_export_agx(b, nir_load_vertex_id(b), .base = AGX_ABI_VIN_VERTEX_ID);
198 nir_export_agx(b, nir_load_instance_id(b), .base = AGX_ABI_VIN_INSTANCE_ID);
199
200 /* Now lower the resulting program using the key */
201 lower_vbo(b->shader, key->attribs, key->robustness);
202
203 /* Clean up redundant vertex ID loads */
204 if (!key->hw || key->adjacency) {
205 NIR_PASS(_, b->shader, nir_opt_cse);
206 NIR_PASS(_, b->shader, nir_opt_dce);
207 }
208
209 if (!key->hw) {
210 agx_nir_lower_sw_vs(b->shader, key->sw_index_size_B);
211 } else if (key->adjacency) {
212 nir_shader_intrinsics_pass(b->shader, lower_adjacency,
213 nir_metadata_control_flow, (void *)key);
214 }
215
216 /* Finally, lower uniforms according to our ABI */
217 unsigned nr = DIV_ROUND_UP(BITSET_LAST_BIT(key->component_mask), 4);
218 nir_shader_intrinsics_pass(b->shader, lower_non_monolithic_uniforms,
219 nir_metadata_control_flow, &nr);
220 b->shader->info.io_lowered = true;
221 }
222
223 static bool
lower_input_to_prolog(nir_builder * b,nir_intrinsic_instr * intr,void * data)224 lower_input_to_prolog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
225 {
226 if (intr->intrinsic != nir_intrinsic_load_input)
227 return false;
228
229 unsigned idx = nir_src_as_uint(intr->src[0]) + nir_intrinsic_base(intr);
230 unsigned comp = nir_intrinsic_component(intr);
231
232 assert(intr->def.bit_size == 32 && "todo: push conversions up?");
233 unsigned base = 4 * idx + comp;
234
235 b->cursor = nir_before_instr(&intr->instr);
236 nir_def *val =
237 nir_load_exported_agx(b, intr->def.num_components, intr->def.bit_size,
238 .base = AGX_ABI_VIN_ATTRIB(base));
239
240 BITSET_WORD *comps_read = data;
241 nir_component_mask_t mask = nir_def_components_read(&intr->def);
242
243 u_foreach_bit(c, mask) {
244 BITSET_SET(comps_read, base + c);
245 }
246
247 nir_def_replace(&intr->def, val);
248 return true;
249 }
250
251 bool
agx_nir_lower_vs_input_to_prolog(nir_shader * s,BITSET_WORD * attrib_components_read)252 agx_nir_lower_vs_input_to_prolog(nir_shader *s,
253 BITSET_WORD *attrib_components_read)
254 {
255 return nir_shader_intrinsics_pass(s, lower_input_to_prolog,
256 nir_metadata_control_flow,
257 attrib_components_read);
258 }
259
260 static bool
lower_active_samples_to_register(nir_builder * b,nir_intrinsic_instr * intr,void * data)261 lower_active_samples_to_register(nir_builder *b, nir_intrinsic_instr *intr,
262 void *data)
263 {
264 if (intr->intrinsic != nir_intrinsic_load_active_samples_agx)
265 return false;
266
267 b->cursor = nir_before_instr(&intr->instr);
268 nir_def *id =
269 nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FIN_SAMPLE_MASK);
270
271 nir_def_replace(&intr->def, id);
272 return true;
273 }
274
275 static bool
lower_tests_zs_intr(nir_builder * b,nir_intrinsic_instr * intr,void * data)276 lower_tests_zs_intr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
277 {
278 bool *value = data;
279 if (intr->intrinsic != nir_intrinsic_load_shader_part_tests_zs_agx)
280 return false;
281
282 b->cursor = nir_instr_remove(&intr->instr);
283 nir_def_rewrite_uses(&intr->def, nir_imm_intN_t(b, *value ? 0xFF : 0, 16));
284 return true;
285 }
286
287 static bool
lower_tests_zs(nir_shader * s,bool value)288 lower_tests_zs(nir_shader *s, bool value)
289 {
290 if (!s->info.fs.uses_discard)
291 return false;
292
293 return nir_shader_intrinsics_pass(s, lower_tests_zs_intr,
294 nir_metadata_control_flow, &value);
295 }
296
297 static inline bool
blend_uses_2src(struct agx_blend_rt_key rt)298 blend_uses_2src(struct agx_blend_rt_key rt)
299 {
300 enum pipe_blendfactor factors[] = {
301 rt.rgb_src_factor,
302 rt.rgb_dst_factor,
303 rt.alpha_src_factor,
304 rt.alpha_dst_factor,
305 };
306
307 for (unsigned i = 0; i < ARRAY_SIZE(factors); ++i) {
308 switch (factors[i]) {
309 case PIPE_BLENDFACTOR_SRC1_COLOR:
310 case PIPE_BLENDFACTOR_SRC1_ALPHA:
311 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
312 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
313 return true;
314 default:
315 break;
316 }
317 }
318
319 return false;
320 }
321
322 static void
copy_colour(nir_builder * b,const struct agx_fs_epilog_key * key,unsigned out_rt,unsigned in_loc,bool dual_src)323 copy_colour(nir_builder *b, const struct agx_fs_epilog_key *key,
324 unsigned out_rt, unsigned in_loc, bool dual_src)
325 {
326 unsigned size = (key->link.size_32 & BITFIELD_BIT(in_loc)) ? 32 : 16;
327
328 nir_def *value =
329 nir_load_exported_agx(b, 4, size, .base = AGX_ABI_FOUT_COLOUR(in_loc));
330
331 if (key->link.loc0_w_1 && in_loc == 0) {
332 value =
333 nir_vector_insert_imm(b, value, nir_imm_floatN_t(b, 1.0, size), 3);
334 }
335
336 nir_store_output(b, value, nir_imm_int(b, 0),
337 .io_semantics.location = FRAG_RESULT_DATA0 + out_rt,
338 .io_semantics.dual_source_blend_index = dual_src,
339 .src_type = nir_type_float | size);
340 }
341
342 void
agx_nir_fs_epilog(nir_builder * b,const void * key_)343 agx_nir_fs_epilog(nir_builder *b, const void *key_)
344 {
345 const struct agx_fs_epilog_key *key = key_;
346 b->shader->info.stage = MESA_SHADER_FRAGMENT;
347 b->shader->info.name = "FS epilog";
348
349 /* First, construct a passthrough shader reading each colour and outputting
350 * the value.
351 */
352 for (unsigned rt = 0; rt < ARRAY_SIZE(key->remap); ++rt) {
353 int location = key->remap[rt];
354
355 /* Negative remaps indicate the attachment isn't written. */
356 if (location >= 0 && key->link.loc_written & BITFIELD_BIT(location)) {
357 copy_colour(b, key, rt, location, false);
358
359 /* If this render target uses dual source blending, also copy the dual
360 * source colour. While the copy_colour above is needed even for
361 * missing attachments to handle alpha-to-coverage, this copy is only
362 * for blending so should be suppressed for missing attachments to keep
363 * the assert from blowing up on OpenGL.
364 */
365 if (blend_uses_2src(key->blend.rt[rt]) &&
366 key->rt_formats[rt] != PIPE_FORMAT_NONE) {
367
368 assert(location == 0);
369 copy_colour(b, key, rt, 1, true);
370 }
371 }
372 }
373
374 /* Grab registers early, this has to happen in the first block. */
375 nir_def *sample_id = NULL, *write_samples = NULL;
376 if (key->link.sample_shading) {
377 sample_id =
378 nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FOUT_SAMPLE_MASK);
379 }
380
381 if (key->link.sample_mask_after_force_early) {
382 write_samples =
383 nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FOUT_WRITE_SAMPLES);
384 }
385
386 /* Now lower the resulting program using the key */
387 struct agx_tilebuffer_layout tib = agx_build_tilebuffer_layout(
388 key->rt_formats, ARRAY_SIZE(key->rt_formats), key->nr_samples, true);
389
390 if (key->force_small_tile)
391 tib.tile_size = (struct agx_tile_size){16, 16};
392
393 bool force_translucent = false;
394 nir_lower_blend_options opts = {
395 .scalar_blend_const = true,
396 .logicop_enable = key->blend.logicop_func != PIPE_LOGICOP_COPY,
397 .logicop_func = key->blend.logicop_func,
398 };
399
400 static_assert(ARRAY_SIZE(opts.format) == 8, "max RTs out of sync");
401
402 for (unsigned i = 0; i < 8; ++i) {
403 opts.format[i] = key->rt_formats[i];
404 opts.rt[i] = (nir_lower_blend_rt){
405 .rgb.src_factor = key->blend.rt[i].rgb_src_factor,
406 .rgb.dst_factor = key->blend.rt[i].rgb_dst_factor,
407 .rgb.func = key->blend.rt[i].rgb_func,
408
409 .alpha.src_factor = key->blend.rt[i].alpha_src_factor,
410 .alpha.dst_factor = key->blend.rt[i].alpha_dst_factor,
411 .alpha.func = key->blend.rt[i].alpha_func,
412
413 .colormask = key->blend.rt[i].colormask,
414 };
415 }
416
417 /* It's more efficient to use masked stores (with
418 * agx_nir_lower_tilebuffer) than to emulate colour masking with
419 * nir_lower_blend.
420 */
421 uint8_t colormasks[8] = {0};
422
423 for (unsigned i = 0; i < 8; ++i) {
424 if (key->rt_formats[i] == PIPE_FORMAT_NONE)
425 continue;
426
427 /* TODO: Flakes some dEQPs, seems to invoke UB. Revisit later.
428 * dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.77
429 * dEQP-GLES2.functional.fragment_ops.interaction.basic_shader.98
430 */
431 if (0 /* agx_tilebuffer_supports_mask(&tib, i) */) {
432 colormasks[i] = key->blend.rt[i].colormask;
433 opts.rt[i].colormask = (uint8_t)BITFIELD_MASK(4);
434 } else {
435 colormasks[i] = (uint8_t)BITFIELD_MASK(4);
436 }
437
438 /* If not all bound RTs are fully written to, we need to force
439 * translucent pass type. agx_nir_lower_tilebuffer will take
440 * care of this for its own colormasks input.
441 */
442 unsigned comps = util_format_get_nr_components(key->rt_formats[i]);
443 if ((opts.rt[i].colormask & BITFIELD_MASK(comps)) !=
444 BITFIELD_MASK(comps)) {
445 force_translucent = true;
446 }
447 }
448
449 /* Alpha-to-coverage must be lowered before alpha-to-one */
450 if (key->blend.alpha_to_coverage)
451 NIR_PASS(_, b->shader, agx_nir_lower_alpha_to_coverage, tib.nr_samples);
452
453 /* Depth/stencil writes must be deferred until after all discards,
454 * particularly alpha-to-coverage.
455 */
456 if (key->link.write_z || key->link.write_s) {
457 nir_store_zs_agx(
458 b, nir_imm_intN_t(b, 0xFF, 16),
459 nir_load_exported_agx(b, 1, 32, .base = AGX_ABI_FOUT_Z),
460 nir_load_exported_agx(b, 1, 16, .base = AGX_ABI_FOUT_S),
461 .base = (key->link.write_z ? 1 : 0) | (key->link.write_s ? 2 : 0));
462
463 if (key->link.write_z)
464 b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
465
466 if (key->link.write_s)
467 b->shader->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_STENCIL);
468 }
469
470 /* Alpha-to-one must be lowered before blending */
471 if (key->blend.alpha_to_one)
472 NIR_PASS(_, b->shader, agx_nir_lower_alpha_to_one);
473
474 NIR_PASS(_, b->shader, nir_lower_blend, &opts);
475
476 unsigned rt_spill = key->link.rt_spill_base;
477 NIR_PASS(_, b->shader, agx_nir_lower_tilebuffer, &tib, colormasks, &rt_spill,
478 write_samples, &force_translucent);
479 NIR_PASS(_, b->shader, agx_nir_lower_texture);
480 NIR_PASS(_, b->shader, agx_nir_lower_multisampled_image_store);
481
482 /* If the API shader runs once per sample, then the epilog runs once per
483 * sample as well, so we need to lower our code to run for a single sample.
484 *
485 * If the API shader runs once per pixel, then the epilog runs once per
486 * pixel. So we run through the monolithic MSAA lowering, which wraps the
487 * epilog in the sample loop if needed. This localizes sample shading
488 * to the epilog, when sample shading is not used but blending is.
489 */
490 if (key->link.sample_shading) {
491 /* Lower the resulting discards. Done in agx_nir_lower_monolithic_msaa for
492 * the pixel shaded path. Must be done before agx_nir_lower_to_per_sample
493 * to avoid duplicating tests.
494 */
495 if (key->blend.alpha_to_coverage) {
496 NIR_PASS(_, b->shader, agx_nir_lower_sample_mask);
497 }
498
499 NIR_PASS(_, b->shader, agx_nir_lower_to_per_sample);
500 NIR_PASS(_, b->shader, agx_nir_lower_fs_active_samples_to_register);
501
502 /* Ensure the sample ID is preserved in register. We do this late since it
503 * has to go in the last block, and the above passes might add control
504 * flow when lowering.
505 */
506 b->cursor = nir_after_impl(b->impl);
507 nir_export_agx(b, sample_id, .base = AGX_ABI_FIN_SAMPLE_MASK);
508 } else {
509 NIR_PASS(_, b->shader, agx_nir_lower_monolithic_msaa, key->nr_samples);
510 }
511
512 /* Finally, lower uniforms according to our ABI */
513 nir_shader_intrinsics_pass(b->shader, lower_non_monolithic_uniforms,
514 nir_metadata_control_flow, NULL);
515
516 /* There is no shader part after the epilog, so we're always responsible for
517 * running our own tests, unless the fragment shader forced early tests.
518 */
519 NIR_PASS(_, b->shader, lower_tests_zs, !key->link.already_ran_zs);
520
521 b->shader->info.io_lowered = true;
522 b->shader->info.fs.uses_fbfetch_output |= force_translucent;
523 b->shader->info.fs.uses_sample_shading = key->link.sample_shading;
524 }
525
526 struct lower_epilog_ctx {
527 struct agx_fs_epilog_link_info *info;
528 nir_variable *masked_samples;
529 };
530
531 static bool
lower_output_to_epilog(nir_builder * b,nir_intrinsic_instr * intr,void * data)532 lower_output_to_epilog(nir_builder *b, nir_intrinsic_instr *intr, void *data)
533 {
534 struct lower_epilog_ctx *ctx = data;
535 struct agx_fs_epilog_link_info *info = ctx->info;
536
537 if (intr->intrinsic == nir_intrinsic_store_zs_agx) {
538 assert(nir_src_as_uint(intr->src[0]) == 0xff && "msaa not yet lowered");
539 b->cursor = nir_instr_remove(&intr->instr);
540
541 unsigned base = nir_intrinsic_base(intr);
542 info->write_z = !!(base & 1);
543 info->write_s = !!(base & 2);
544
545 if (info->write_z)
546 nir_export_agx(b, intr->src[1].ssa, .base = AGX_ABI_FOUT_Z);
547
548 if (info->write_s)
549 nir_export_agx(b, intr->src[2].ssa, .base = AGX_ABI_FOUT_S);
550
551 return true;
552 }
553
554 if (intr->intrinsic == nir_intrinsic_discard_agx &&
555 b->shader->info.fs.early_fragment_tests) {
556
557 if (!ctx->masked_samples) {
558 b->cursor = nir_before_impl(nir_shader_get_entrypoint(b->shader));
559
560 ctx->masked_samples =
561 nir_local_variable_create(b->impl, glsl_uint16_t_type(), NULL);
562
563 nir_store_var(b, ctx->masked_samples, nir_imm_intN_t(b, 0xFF, 16),
564 nir_component_mask(1));
565 }
566
567 b->cursor = nir_before_instr(&intr->instr);
568
569 nir_def *mask = nir_load_var(b, ctx->masked_samples);
570 nir_def *mask_2 =
571 nir_ixor(b, intr->src[0].ssa, nir_imm_intN_t(b, 0xff, 16));
572
573 mask = nir_iand(b, mask, mask_2);
574 nir_store_var(b, ctx->masked_samples, mask, nir_component_mask(1));
575
576 nir_instr_remove(&intr->instr);
577 return true;
578 }
579
580 if (intr->intrinsic != nir_intrinsic_store_output)
581 return false;
582
583 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
584
585 /* Fix up gl_FragColor */
586 if (sem.location == FRAG_RESULT_COLOR) {
587 sem.location = FRAG_RESULT_DATA0;
588 info->broadcast_rt0 = true;
589 }
590
591 /* We don't use the epilog for sample mask writes */
592 if (sem.location < FRAG_RESULT_DATA0)
593 return false;
594
595 /* Determine the ABI location. Dual source blending aliases a second
596 * render target, so get that out of the way now.
597 */
598 unsigned loc = sem.location - FRAG_RESULT_DATA0;
599 loc += nir_src_as_uint(intr->src[1]);
600
601 if (sem.dual_source_blend_index) {
602 assert(loc == 0);
603 loc = 1;
604 }
605
606 b->cursor = nir_instr_remove(&intr->instr);
607 nir_def *vec = intr->src[0].ssa;
608
609 info->loc_written |= BITFIELD_BIT(loc);
610
611 if (vec->bit_size == 32)
612 info->size_32 |= BITFIELD_BIT(loc);
613 else
614 assert(vec->bit_size == 16);
615
616 uint32_t one_f = (vec->bit_size == 32 ? fui(1.0) : _mesa_float_to_half(1.0));
617 unsigned comp = nir_intrinsic_component(intr);
618
619 u_foreach_bit(c, nir_intrinsic_write_mask(intr)) {
620 nir_scalar s = nir_scalar_resolved(vec, c);
621 if (loc == 0 && c == 3 && nir_scalar_is_const(s) &&
622 nir_scalar_as_uint(s) == one_f) {
623
624 info->loc0_w_1 = true;
625 } else {
626 unsigned stride = vec->bit_size / 16;
627
628 nir_export_agx(b, nir_channel(b, vec, c),
629 .base = AGX_ABI_FOUT_COLOUR(loc) + (comp + c) * stride);
630 }
631 }
632
633 return true;
634 }
635
636 bool
agx_nir_lower_fs_output_to_epilog(nir_shader * s,struct agx_fs_epilog_link_info * out)637 agx_nir_lower_fs_output_to_epilog(nir_shader *s,
638 struct agx_fs_epilog_link_info *out)
639 {
640 struct lower_epilog_ctx ctx = {.info = out};
641
642 nir_shader_intrinsics_pass(s, lower_output_to_epilog,
643 nir_metadata_control_flow, &ctx);
644
645 if (ctx.masked_samples) {
646 nir_builder b =
647 nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(s)));
648
649 nir_export_agx(&b, nir_load_var(&b, ctx.masked_samples),
650 .base = AGX_ABI_FOUT_WRITE_SAMPLES);
651 out->sample_mask_after_force_early = true;
652
653 bool progress;
654 do {
655 progress = false;
656 NIR_PASS(progress, s, nir_lower_vars_to_ssa);
657 NIR_PASS(progress, s, nir_opt_dce);
658 } while (progress);
659 }
660
661 out->sample_shading = s->info.fs.uses_sample_shading;
662 return true;
663 }
664
665 bool
agx_nir_lower_fs_active_samples_to_register(nir_shader * s)666 agx_nir_lower_fs_active_samples_to_register(nir_shader *s)
667 {
668 return nir_shader_intrinsics_pass(s, lower_active_samples_to_register,
669 nir_metadata_control_flow, NULL);
670 }
671
672 static bool
agx_nir_lower_stats_fs(nir_shader * s)673 agx_nir_lower_stats_fs(nir_shader *s)
674 {
675 assert(s->info.stage == MESA_SHADER_FRAGMENT);
676 nir_builder b_ =
677 nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
678 nir_builder *b = &b_;
679
680 nir_push_if(b, nir_inot(b, nir_load_helper_invocation(b, 1)));
681 nir_def *samples = nir_bit_count(b, nir_load_sample_mask_in(b));
682 unsigned query = PIPE_STAT_QUERY_PS_INVOCATIONS;
683
684 nir_def *addr = nir_load_stat_query_address_agx(b, .base = query);
685 nir_global_atomic(b, 32, addr, samples, .atomic_op = nir_atomic_op_iadd);
686
687 nir_pop_if(b, NULL);
688 nir_metadata_preserve(b->impl, nir_metadata_control_flow);
689 return true;
690 }
691
692 void
agx_nir_fs_prolog(nir_builder * b,const void * key_)693 agx_nir_fs_prolog(nir_builder *b, const void *key_)
694 {
695 const struct agx_fs_prolog_key *key = key_;
696 b->shader->info.stage = MESA_SHADER_FRAGMENT;
697 b->shader->info.name = "FS prolog";
698
699 /* First, insert code for any emulated features */
700 if (key->api_sample_mask != 0xff) {
701 /* Kill samples that are NOT covered by the mask */
702 nir_discard_agx(b, nir_imm_intN_t(b, key->api_sample_mask ^ 0xff, 16));
703 b->shader->info.fs.uses_discard = true;
704 }
705
706 if (key->statistics) {
707 NIR_PASS(_, b->shader, agx_nir_lower_stats_fs);
708 }
709
710 if (key->cull_distance_size) {
711 NIR_PASS(_, b->shader, agx_nir_lower_cull_distance_fs,
712 key->cull_distance_size);
713 }
714
715 if (key->polygon_stipple) {
716 NIR_PASS_V(b->shader, agx_nir_lower_poly_stipple);
717 }
718
719 /* Then, lower the prolog */
720 NIR_PASS(_, b->shader, agx_nir_lower_discard_zs_emit);
721 NIR_PASS(_, b->shader, agx_nir_lower_sample_mask);
722 NIR_PASS(_, b->shader, nir_shader_intrinsics_pass,
723 lower_non_monolithic_uniforms, nir_metadata_control_flow, NULL);
724 NIR_PASS(_, b->shader, lower_tests_zs, key->run_zs_tests);
725
726 b->shader->info.io_lowered = true;
727 }
728