1 /*
2 * Copyright 2023 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "compiler/nir/nir.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "util/macros.h"
9 #include "agx_compile.h"
10 #include "agx_nir.h"
11 #include "glsl_types.h"
12 #include "shader_enums.h"
13
14 /*
15 * Lower cull distance to discard. From the spec:
16 *
17 * If the cull distance for any enabled cull half-space is negative for all
18 * of the vertices of the primitive under consideration, the primitive is
19 * discarded.
20 *
21 * We don't have a direct way to read the cull distance at non-provoking
22 * vertices in the fragment shader. Instead, we interpolate the quantity:
23 *
24 * cull distance >= 0.0 ? 1.0 : 0.0
25 *
26 * Then, the discard condition is equivalent to:
27 *
28 * "quantity is zero for all vertices of the primitive"
29 *
30 * which by linearity is equivalent to:
31 *
32 * quantity is zero somewhere in the primitive and quantity has zero
33 * first-order screen space derivatives.
34 *
35 * which we can determine with ease in the fragment shader.
36 */
37
38 static bool
lower_write(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)39 lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
40 {
41 if (intr->intrinsic != nir_intrinsic_store_output)
42 return false;
43
44 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
45 if (sem.location != VARYING_SLOT_CULL_DIST0)
46 return false;
47
48 nir_instr *clone = nir_instr_clone(b->shader, &intr->instr);
49 nir_intrinsic_instr *lowered = nir_instr_as_intrinsic(clone);
50
51 b->cursor = nir_after_instr(&intr->instr);
52 nir_def *v = nir_b2f32(b, nir_fge_imm(b, intr->src[0].ssa, 0.0));
53
54 nir_builder_instr_insert(b, clone);
55 nir_src_rewrite(&lowered->src[0], v);
56
57 sem.location = VARYING_SLOT_CULL_PRIMITIVE;
58 nir_intrinsic_set_io_semantics(lowered, sem);
59 return true;
60 }
61
62 bool
agx_nir_lower_cull_distance_vs(nir_shader * s)63 agx_nir_lower_cull_distance_vs(nir_shader *s)
64 {
65 assert(s->info.stage == MESA_SHADER_VERTEX ||
66 s->info.stage == MESA_SHADER_TESS_EVAL);
67
68 assert(s->info.outputs_written & VARYING_BIT_CULL_DIST0);
69
70 nir_shader_intrinsics_pass(
71 s, lower_write, nir_metadata_block_index | nir_metadata_dominance, NULL);
72
73 s->info.outputs_written |=
74 BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
75 DIV_ROUND_UP(s->info.cull_distance_array_size, 4));
76 return true;
77 }
78
79 bool
agx_nir_lower_cull_distance_fs(nir_shader * s,unsigned nr_distances)80 agx_nir_lower_cull_distance_fs(nir_shader *s, unsigned nr_distances)
81 {
82 assert(s->info.stage == MESA_SHADER_FRAGMENT);
83 assert(nr_distances > 0);
84
85 nir_builder b_ =
86 nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(s)));
87 nir_builder *b = &b_;
88
89 /* Test each half-space */
90 nir_def *culled = nir_imm_false(b);
91
92 for (unsigned i = 0; i < nr_distances; ++i) {
93 /* Load the coefficient vector for this half-space. Imaginapple
94 * partial derivatives and the value somewhere.
95 */
96 nir_def *cf = nir_load_coefficients_agx(
97 b, .component = i & 3,
98 .io_semantics.location = VARYING_SLOT_CULL_PRIMITIVE + (i / 4),
99 .io_semantics.num_slots = nr_distances / 4,
100 .interp_mode = INTERP_MODE_NOPERSPECTIVE);
101
102 /* If the coefficients are identically zero, then the quantity is
103 * zero across the primtive <==> cull distance is negative across the
104 * primitive <==> the primitive is culled.
105 */
106 culled = nir_ior(b, culled, nir_ball(b, nir_feq_imm(b, cf, 0)));
107 }
108
109 /* Emulate primitive culling by discarding fragments */
110 nir_discard_if(b, culled);
111
112 s->info.inputs_read |= BITFIELD64_RANGE(VARYING_SLOT_CULL_PRIMITIVE,
113 DIV_ROUND_UP(nr_distances, 4));
114
115 s->info.fs.uses_discard = true;
116 nir_metadata_preserve(b->impl,
117 nir_metadata_dominance | nir_metadata_block_index);
118 return true;
119 }
120