• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "compiler/glsl/list.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "agx_compiler.h"
9 #include "nir.h"
10 #include "nir_builder_opcodes.h"
11 #include "nir_intrinsics.h"
12 
13 /*
14  * sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
15  * to an indexed sample. Roughly, the instruction does:
16  *
17  *    foreach sample in TARGET {
18  *       if sample in LIVE {
19  *          run depth/stencil/occlusion test/update
20  *       } else {
21  *          kill sample
22  *       }
23  *    }
24  *
25  * As a special case, TARGET may be set to all-1s (~0) to refer to all samples
26  * regardless of the framebuffer sample count.
27  *
28  * For example, to discard an entire pixel unconditionally, we could run:
29  *
30  *    sample_mask ~0, 0
31  *
32  * sample_mask must follow these rules:
33  *
34  * 1. All sample_mask instructions affecting a sample must execute before a
35  *    local_store_pixel instruction targeting that sample. This ensures that
36  *    nothing is written for discarded samples (whether discarded in shader or
37  *    due to a failed depth/stencil test).
38  *
39  * 2. If sample_mask is used anywhere in a shader, then on every execution path,
40  *    every sample must be killed or else run depth/stencil tests exactly ONCE.
41  *
42  * 3. If a sample is killed, future sample_mask instructions have
43  *    no effect on that sample. The following code sequence correctly implements
44  *    a conditional discard (if there are no other sample_mask instructions in
45  *    the shader):
46  *
47  *       sample_mask discarded, 0
48  *       sample_mask ~0, ~0
49  *
50  *    but this sequence is incorrect:
51  *
52  *       sample_mask ~0, ~discarded
53  *       sample_mask ~0, ~0         <-- incorrect: depth/stencil tests run twice
54  *
55  * 4. zs_emit may be used in the shader exactly once to trigger tests.
56  * sample_mask with 0 may be used to discard early.
57  *
58  * This pass lowers discard_agx to sample_mask instructions satisfying these
59  * rules. Other passes should not generate sample_mask instructions, as there
60  * are too many footguns.
61  */
62 
63 #define ALL_SAMPLES (0xFF)
64 #define BASE_Z      1
65 #define BASE_S      2
66 
67 static bool
lower_discard_to_sample_mask_0(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)68 lower_discard_to_sample_mask_0(nir_builder *b, nir_intrinsic_instr *intr,
69                                UNUSED void *data)
70 {
71    if (intr->intrinsic != nir_intrinsic_discard_agx)
72       return false;
73 
74    b->cursor = nir_before_instr(&intr->instr);
75    nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
76    nir_instr_remove(&intr->instr);
77    return true;
78 }
79 
80 static nir_intrinsic_instr *
last_discard_in_block(nir_block * block)81 last_discard_in_block(nir_block *block)
82 {
83    nir_foreach_instr_reverse(instr, block) {
84       if (instr->type != nir_instr_type_intrinsic)
85          continue;
86 
87       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
88       if (intr->intrinsic == nir_intrinsic_discard_agx)
89          return intr;
90    }
91 
92    return NULL;
93 }
94 
95 static bool
cf_node_contains_discard(nir_cf_node * node)96 cf_node_contains_discard(nir_cf_node *node)
97 {
98    nir_foreach_block_in_cf_node(block, node) {
99       if (last_discard_in_block(block))
100          return true;
101    }
102 
103    return false;
104 }
105 
106 /*
107  * We want to run depth/stencil tests as early as possible, but we have to
108  * wait until after the last discard. We find the last discard and
109  * execute depth/stencil tests in the first unconditional block after (if
110  * in conditional control flow), or fuse depth/stencil tests into the
111  * sample instruction (if in unconditional control flow).
112  *
113  * To do so, we walk the root control flow list backwards, looking for the
114  * earliest unconditionally executed instruction after all discard.
115  */
116 static void
run_tests_after_last_discard(nir_builder * b)117 run_tests_after_last_discard(nir_builder *b)
118 {
119    foreach_list_typed_reverse(nir_cf_node, node, node, &b->impl->body) {
120       if (node->type == nir_cf_node_block) {
121          /* Unconditionally executed block */
122          nir_block *block = nir_cf_node_as_block(node);
123          nir_intrinsic_instr *intr = last_discard_in_block(block);
124 
125          if (intr) {
126             /* Last discard is executed unconditionally, so fuse tests. */
127             b->cursor = nir_before_instr(&intr->instr);
128 
129             nir_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
130             nir_def *killed = intr->src[0].ssa;
131             nir_def *live = nir_ixor(b, killed, all_samples);
132 
133             nir_sample_mask_agx(b, all_samples, live);
134             nir_instr_remove(&intr->instr);
135             return;
136          } else {
137             /* Set cursor for insertion due to a preceding conditionally
138              * executed discard.
139              */
140             b->cursor = nir_before_block_after_phis(block);
141          }
142       } else if (cf_node_contains_discard(node)) {
143          /* Conditionally executed block contains the last discard. Test
144           * depth/stencil for remaining samples in unconditional code after.
145           */
146          nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16),
147                              nir_imm_intN_t(b, ALL_SAMPLES, 16));
148          return;
149       }
150    }
151 }
152 
153 static void
run_tests_at_start(nir_shader * shader)154 run_tests_at_start(nir_shader *shader)
155 {
156    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
157    nir_builder b = nir_builder_at(nir_before_impl(impl));
158 
159    nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
160                        nir_imm_intN_t(&b, ALL_SAMPLES, 16));
161 }
162 
163 bool
agx_nir_lower_sample_mask(nir_shader * shader)164 agx_nir_lower_sample_mask(nir_shader *shader)
165 {
166    bool writes_zs =
167       shader->info.outputs_written &
168       (BITFIELD64_BIT(FRAG_RESULT_STENCIL) | BITFIELD64_BIT(FRAG_RESULT_DEPTH));
169 
170    if (shader->info.fs.early_fragment_tests) {
171       /* run tests early, if we need testing */
172       if (shader->info.fs.uses_discard || writes_zs ||
173           shader->info.writes_memory) {
174 
175          run_tests_at_start(shader);
176       }
177    } else if (shader->info.fs.uses_discard) {
178       /* If we have zs_emit, the tests will be triggered by zs_emit, otherwise
179        * we need to trigger tests explicitly. Allow sample_mask with zs_emit.
180        */
181       if (!writes_zs) {
182          nir_function_impl *impl = nir_shader_get_entrypoint(shader);
183          nir_builder b = nir_builder_create(impl);
184 
185          /* run tests late */
186          run_tests_after_last_discard(&b);
187       }
188    } else {
189       /* regular shaders that don't use discard have nothing to lower */
190       return false;
191    }
192 
193    nir_shader_intrinsics_pass(shader, lower_discard_to_sample_mask_0,
194                               nir_metadata_block_index | nir_metadata_dominance,
195                               NULL);
196 
197    return true;
198 }
199