1 /*
2 * Copyright 2023 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "compiler/glsl/list.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "agx_compiler.h"
9 #include "nir.h"
10 #include "nir_builder_opcodes.h"
11 #include "nir_intrinsics.h"
12
13 /*
14 * sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
15 * to an indexed sample. Roughly, the instruction does:
16 *
17 * foreach sample in TARGET {
18 * if sample in LIVE {
19 * run depth/stencil/occlusion test/update
20 * } else {
21 * kill sample
22 * }
23 * }
24 *
25 * As a special case, TARGET may be set to all-1s (~0) to refer to all samples
26 * regardless of the framebuffer sample count.
27 *
28 * For example, to discard an entire pixel unconditionally, we could run:
29 *
30 * sample_mask ~0, 0
31 *
32 * sample_mask must follow these rules:
33 *
34 * 1. All sample_mask instructions affecting a sample must execute before a
35 * local_store_pixel instruction targeting that sample. This ensures that
36 * nothing is written for discarded samples (whether discarded in shader or
37 * due to a failed depth/stencil test).
38 *
39 * 2. If sample_mask is used anywhere in a shader, then on every execution path,
40 * every sample must be killed or else run depth/stencil tests exactly ONCE.
41 *
42 * 3. If a sample is killed, future sample_mask instructions have
43 * no effect on that sample. The following code sequence correctly implements
44 * a conditional discard (if there are no other sample_mask instructions in
45 * the shader):
46 *
47 * sample_mask discarded, 0
48 * sample_mask ~0, ~0
49 *
50 * but this sequence is incorrect:
51 *
52 * sample_mask ~0, ~discarded
53 * sample_mask ~0, ~0 <-- incorrect: depth/stencil tests run twice
54 *
55 * 4. zs_emit may be used in the shader exactly once to trigger tests.
56 * sample_mask with 0 may be used to discard early.
57 *
58 * This pass lowers discard_agx to sample_mask instructions satisfying these
59 * rules. Other passes should not generate sample_mask instructions, as there
60 * are too many footguns.
61 */
62
63 #define ALL_SAMPLES (0xFF)
64 #define BASE_Z 1
65 #define BASE_S 2
66
67 static bool
lower_discard_to_sample_mask_0(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)68 lower_discard_to_sample_mask_0(nir_builder *b, nir_intrinsic_instr *intr,
69 UNUSED void *data)
70 {
71 if (intr->intrinsic != nir_intrinsic_discard_agx)
72 return false;
73
74 b->cursor = nir_before_instr(&intr->instr);
75 nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
76 nir_instr_remove(&intr->instr);
77 return true;
78 }
79
80 static nir_intrinsic_instr *
last_discard_in_block(nir_block * block)81 last_discard_in_block(nir_block *block)
82 {
83 nir_foreach_instr_reverse(instr, block) {
84 if (instr->type != nir_instr_type_intrinsic)
85 continue;
86
87 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
88 if (intr->intrinsic == nir_intrinsic_discard_agx)
89 return intr;
90 }
91
92 return NULL;
93 }
94
95 static bool
cf_node_contains_discard(nir_cf_node * node)96 cf_node_contains_discard(nir_cf_node *node)
97 {
98 nir_foreach_block_in_cf_node(block, node) {
99 if (last_discard_in_block(block))
100 return true;
101 }
102
103 return false;
104 }
105
106 /*
107 * We want to run depth/stencil tests as early as possible, but we have to
108 * wait until after the last discard. We find the last discard and
109 * execute depth/stencil tests in the first unconditional block after (if
110 * in conditional control flow), or fuse depth/stencil tests into the
111 * sample instruction (if in unconditional control flow).
112 *
113 * To do so, we walk the root control flow list backwards, looking for the
114 * earliest unconditionally executed instruction after all discard.
115 */
116 static void
run_tests_after_last_discard(nir_builder * b)117 run_tests_after_last_discard(nir_builder *b)
118 {
119 foreach_list_typed_reverse(nir_cf_node, node, node, &b->impl->body) {
120 if (node->type == nir_cf_node_block) {
121 /* Unconditionally executed block */
122 nir_block *block = nir_cf_node_as_block(node);
123 nir_intrinsic_instr *intr = last_discard_in_block(block);
124
125 if (intr) {
126 /* Last discard is executed unconditionally, so fuse tests. */
127 b->cursor = nir_before_instr(&intr->instr);
128
129 nir_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
130 nir_def *killed = intr->src[0].ssa;
131 nir_def *live = nir_ixor(b, killed, all_samples);
132
133 nir_sample_mask_agx(b, all_samples, live);
134 nir_instr_remove(&intr->instr);
135 return;
136 } else {
137 /* Set cursor for insertion due to a preceding conditionally
138 * executed discard.
139 */
140 b->cursor = nir_before_block_after_phis(block);
141 }
142 } else if (cf_node_contains_discard(node)) {
143 /* Conditionally executed block contains the last discard. Test
144 * depth/stencil for remaining samples in unconditional code after.
145 */
146 nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16),
147 nir_imm_intN_t(b, ALL_SAMPLES, 16));
148 return;
149 }
150 }
151 }
152
153 static void
run_tests_at_start(nir_shader * shader)154 run_tests_at_start(nir_shader *shader)
155 {
156 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
157 nir_builder b = nir_builder_at(nir_before_impl(impl));
158
159 nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
160 nir_imm_intN_t(&b, ALL_SAMPLES, 16));
161 }
162
163 bool
agx_nir_lower_sample_mask(nir_shader * shader)164 agx_nir_lower_sample_mask(nir_shader *shader)
165 {
166 bool writes_zs =
167 shader->info.outputs_written &
168 (BITFIELD64_BIT(FRAG_RESULT_STENCIL) | BITFIELD64_BIT(FRAG_RESULT_DEPTH));
169
170 if (shader->info.fs.early_fragment_tests) {
171 /* run tests early, if we need testing */
172 if (shader->info.fs.uses_discard || writes_zs ||
173 shader->info.writes_memory) {
174
175 run_tests_at_start(shader);
176 }
177 } else if (shader->info.fs.uses_discard) {
178 /* If we have zs_emit, the tests will be triggered by zs_emit, otherwise
179 * we need to trigger tests explicitly. Allow sample_mask with zs_emit.
180 */
181 if (!writes_zs) {
182 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
183 nir_builder b = nir_builder_create(impl);
184
185 /* run tests late */
186 run_tests_after_last_discard(&b);
187 }
188 } else {
189 /* regular shaders that don't use discard have nothing to lower */
190 return false;
191 }
192
193 nir_shader_intrinsics_pass(shader, lower_discard_to_sample_mask_0,
194 nir_metadata_block_index | nir_metadata_dominance,
195 NULL);
196
197 return true;
198 }
199