1 /*
2 * Copyright © 2024 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "nir.h"
8 #include <math.h>
9
10 static unsigned
get_tess_level_component(nir_intrinsic_instr * intr)11 get_tess_level_component(nir_intrinsic_instr *intr)
12 {
13 unsigned location = nir_intrinsic_io_semantics(intr).location;
14
15 return (location == VARYING_SLOT_TESS_LEVEL_INNER ? 4 : 0) +
16 nir_intrinsic_component(intr);
17 }
18
19 static unsigned
get_inst_tesslevel_writemask(nir_intrinsic_instr * intr)20 get_inst_tesslevel_writemask(nir_intrinsic_instr *intr)
21 {
22 if (intr->intrinsic != nir_intrinsic_store_output)
23 return 0;
24
25 unsigned location = nir_intrinsic_io_semantics(intr).location;
26 if (location != VARYING_SLOT_TESS_LEVEL_OUTER &&
27 location != VARYING_SLOT_TESS_LEVEL_INNER)
28 return 0;
29
30 return nir_intrinsic_write_mask(intr) << get_tess_level_component(intr);
31 }
32
33 static bool
is_tcs_output_barrier(nir_intrinsic_instr * intr)34 is_tcs_output_barrier(nir_intrinsic_instr *intr)
35 {
36 return intr->intrinsic == nir_intrinsic_barrier &&
37 nir_intrinsic_memory_modes(intr) & nir_var_shader_out &&
38 nir_intrinsic_memory_scope(intr) >= SCOPE_WORKGROUP &&
39 nir_intrinsic_execution_scope(intr) >= SCOPE_WORKGROUP;
40 }
41
42 static void
scan_tess_levels(struct exec_list * cf_list,unsigned * upper_block_tl_writemask,unsigned * cond_block_tl_writemask,bool * all_invocs_define_tess_levels,bool is_nested_cf)43 scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask,
44 unsigned *cond_block_tl_writemask,
45 bool *all_invocs_define_tess_levels, bool is_nested_cf)
46 {
47 foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
48 switch (cf_node->type) {
49 case nir_cf_node_block: {
50 nir_block *block = nir_cf_node_as_block(cf_node);
51 nir_foreach_instr (instr, block) {
52 if (instr->type != nir_instr_type_intrinsic)
53 continue;
54
55 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
56
57 if (!is_tcs_output_barrier(intrin)) {
58 *upper_block_tl_writemask |= get_inst_tesslevel_writemask(intrin);
59 continue;
60 }
61
62 /* This is a barrier. If it's in nested control flow, put this
63 * in the too hard basket. In GLSL this is not possible but it is
64 * in SPIR-V.
65 */
66 if (is_nested_cf) {
67 *all_invocs_define_tess_levels = false;
68 return;
69 }
70
71 /* The following case must be prevented:
72 * gl_TessLevelInner = ...;
73 * barrier();
74 * if (gl_InvocationID == 1)
75 * gl_TessLevelInner = ...;
76 *
77 * If you consider disjoint code segments separated by barriers,
78 * each such segment that writes tess level channels should write
79 * the same channels in all codepaths within that segment.
80 */
81 if (*upper_block_tl_writemask || *cond_block_tl_writemask) {
82 /* Accumulate the result: */
83 *all_invocs_define_tess_levels &=
84 !(*cond_block_tl_writemask & ~(*upper_block_tl_writemask));
85
86 /* Analyze the next code segment from scratch. */
87 *upper_block_tl_writemask = 0;
88 *cond_block_tl_writemask = 0;
89 }
90 }
91 break;
92 }
93 case nir_cf_node_if: {
94 unsigned then_tesslevel_writemask = 0;
95 unsigned else_tesslevel_writemask = 0;
96 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
97
98 scan_tess_levels(&if_stmt->then_list, &then_tesslevel_writemask,
99 cond_block_tl_writemask,
100 all_invocs_define_tess_levels, true);
101
102 scan_tess_levels(&if_stmt->else_list, &else_tesslevel_writemask,
103 cond_block_tl_writemask,
104 all_invocs_define_tess_levels, true);
105
106 if (then_tesslevel_writemask || else_tesslevel_writemask) {
107 /* If both statements write the same tess level channels,
108 * we can say that the upper block writes them too.
109 */
110 *upper_block_tl_writemask |= then_tesslevel_writemask &
111 else_tesslevel_writemask;
112 *cond_block_tl_writemask |= then_tesslevel_writemask |
113 else_tesslevel_writemask;
114 }
115 break;
116 }
117 case nir_cf_node_loop: {
118 nir_loop *loop = nir_cf_node_as_loop(cf_node);
119 assert(!nir_loop_has_continue_construct(loop));
120
121 scan_tess_levels(&loop->body, cond_block_tl_writemask,
122 cond_block_tl_writemask,
123 all_invocs_define_tess_levels, true);
124 break;
125 }
126 default:
127 unreachable("unknown cf node type");
128 }
129 }
130 }
131
132 static bool
all_invocations_define_tess_levels(const struct nir_shader * nir)133 all_invocations_define_tess_levels(const struct nir_shader *nir)
134 {
135 assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
136
137 /* The pass works as follows:
138 *
139 * If all codepaths write tess levels, we can say that all invocations
140 * define tess level values. Whether a tess level value is defined is
141 * determined for each component separately.
142 */
143 unsigned main_block_tl_writemask = 0; /* if main block writes tess levels */
144 unsigned cond_block_tl_writemask = 0; /* if cond block writes tess levels */
145
146 /* Initial value = true. Here the pass will accumulate results from
147 * multiple segments surrounded by barriers. If tess levels aren't
148 * written at all, it's a shader bug and we don't care if this will be
149 * true.
150 */
151 bool result = true;
152
153 nir_foreach_function_impl(impl, nir) {
154 scan_tess_levels(&impl->body, &main_block_tl_writemask,
155 &cond_block_tl_writemask,
156 &result, false);
157 }
158
159 /* Accumulate the result for the last code segment separated by a
160 * barrier.
161 */
162 if (main_block_tl_writemask || cond_block_tl_writemask)
163 result &= !(cond_block_tl_writemask & ~main_block_tl_writemask);
164
165 return result;
166 }
167
168 /* It's OK to pass UNSPECIFIED to prim and spacing. */
169 void
nir_gather_tcs_info(const nir_shader * nir,nir_tcs_info * info,enum tess_primitive_mode prim,enum gl_tess_spacing spacing)170 nir_gather_tcs_info(const nir_shader *nir, nir_tcs_info *info,
171 enum tess_primitive_mode prim,
172 enum gl_tess_spacing spacing)
173 {
174 memset(info, 0, sizeof(*info));
175 info->all_invocations_define_tess_levels =
176 all_invocations_define_tess_levels(nir);
177
178 unsigned tess_level_writes_le_zero = 0;
179 unsigned tess_level_writes_le_one = 0;
180 unsigned tess_level_writes_le_two = 0;
181 unsigned tess_level_writes_other = 0;
182
183 /* Gather barriers and which values are written to tess level outputs. */
184 nir_foreach_function_impl(impl, nir) {
185 nir_foreach_block(block, impl) {
186 nir_foreach_instr(instr, block) {
187 if (instr->type != nir_instr_type_intrinsic)
188 continue;
189
190 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
191
192 if (is_tcs_output_barrier(intr)) {
193 /* Only gather barriers outside control flow. */
194 if (block->cf_node.parent->type == nir_cf_node_function)
195 info->always_executes_barrier = true;
196 continue;
197 }
198
199 if (intr->intrinsic != nir_intrinsic_store_output)
200 continue;
201
202 unsigned location = nir_intrinsic_io_semantics(intr).location;
203 if (location != VARYING_SLOT_TESS_LEVEL_OUTER &&
204 location != VARYING_SLOT_TESS_LEVEL_INNER)
205 continue;
206
207 unsigned base_shift = get_tess_level_component(intr);
208 unsigned writemask = nir_intrinsic_write_mask(intr);
209
210 u_foreach_bit(i, writemask) {
211 nir_scalar scalar = nir_scalar_resolved(intr->src[0].ssa, i);
212 unsigned shift = base_shift + i;
213
214 if (nir_scalar_is_const(scalar)) {
215 float f = nir_scalar_as_float(scalar);
216
217 if (f <= 0 || isnan(f))
218 tess_level_writes_le_zero |= BITFIELD_BIT(shift);
219 else if (f <= 1)
220 tess_level_writes_le_one |= BITFIELD_BIT(shift);
221 else if (f <= 2)
222 tess_level_writes_le_two |= BITFIELD_BIT(shift);
223 else
224 tess_level_writes_other |= BITFIELD_BIT(shift);
225 } else {
226 /* TODO: This could use range analysis. */
227 tess_level_writes_other |= BITFIELD_BIT(shift);
228 }
229 }
230 }
231 }
232 }
233
234 /* Determine which outer tess level components can discard patches.
235 * If the primitive type is unspecified, we have to assume the worst case.
236 */
237 unsigned min_outer, min_inner, max_outer, max_inner;
238 mesa_count_tess_level_components(prim == TESS_PRIMITIVE_UNSPECIFIED ?
239 TESS_PRIMITIVE_ISOLINES : prim,
240 &min_outer, &min_inner);
241 mesa_count_tess_level_components(prim, &max_outer, &max_inner);
242 const unsigned min_valid_outer_comp_mask = BITFIELD_RANGE(0, min_outer);
243 const unsigned max_valid_outer_comp_mask = BITFIELD_RANGE(0, max_outer);
244 const unsigned max_valid_inner_comp_mask = BITFIELD_RANGE(4, max_inner);
245
246 /* All tessellation levels are effectively 0 if the patch has at least one
247 * outer tess level component either in the [-inf, 0] range or equal to NaN,
248 * causing it to be discarded. Inner tess levels have no effect.
249 */
250 info->all_tess_levels_are_effectively_zero =
251 tess_level_writes_le_zero & ~tess_level_writes_le_one &
252 ~tess_level_writes_le_two & ~tess_level_writes_other &
253 min_valid_outer_comp_mask;
254
255 const unsigned tess_level_writes_any =
256 tess_level_writes_le_zero | tess_level_writes_le_one |
257 tess_level_writes_le_two | tess_level_writes_other;
258
259 const bool outer_is_gt_zero_le_one =
260 (tess_level_writes_le_one & ~tess_level_writes_le_zero &
261 ~tess_level_writes_le_two & ~tess_level_writes_other &
262 max_valid_outer_comp_mask) ==
263 (tess_level_writes_any & max_valid_outer_comp_mask);
264
265 /* Whether the inner tess levels are in the [-inf, 1] range. */
266 const bool inner_is_le_one =
267 ((tess_level_writes_le_zero | tess_level_writes_le_one) &
268 ~tess_level_writes_le_two & ~tess_level_writes_other &
269 max_valid_inner_comp_mask) ==
270 (tess_level_writes_any & max_valid_inner_comp_mask);
271
272 /* If the patch has tess level values set to 1 or equivalent numbers, it's
273 * not discarded, but different things happen depending on the spacing.
274 */
275 switch (spacing) {
276 case TESS_SPACING_EQUAL:
277 case TESS_SPACING_FRACTIONAL_ODD:
278 case TESS_SPACING_UNSPECIFIED:
279 /* The tessellator clamps all tess levels greater than 0 to 1.
280 * If all outer and inner tess levels are in the (0, 1] range, which is
281 * effectively 1, untessellated patches are drawn.
282 */
283 info->all_tess_levels_are_effectively_one = outer_is_gt_zero_le_one &&
284 inner_is_le_one;
285 break;
286
287 case TESS_SPACING_FRACTIONAL_EVEN: {
288 /* The tessellator clamps all tess levels to 2 (both outer and inner)
289 * except outer tess level component 0 of isolines, which is clamped
290 * to 1. If all outer tess levels are in the (0, 2] or (0, 1] range
291 * (for outer[0] of isolines) and all inner tess levels are
292 * in the [-inf, 2] range, it's the same as writing 1 to all tess
293 * levels.
294 */
295 bool isolines_are_eff_one =
296 /* The (0, 1] range of outer[0]. */
297 (tess_level_writes_le_one & ~tess_level_writes_le_zero &
298 ~tess_level_writes_le_two & ~tess_level_writes_other & 0x1) ==
299 (tess_level_writes_any & 0x1) &&
300 /* The (0, 2] range of outer[1]. */
301 ((tess_level_writes_le_one | tess_level_writes_le_two) &
302 ~tess_level_writes_le_zero & ~tess_level_writes_other & 0x2) ==
303 (tess_level_writes_any & 0x2);
304
305 bool triquads_are_eff_one =
306 /* The (0, 2] outer range. */
307 ((tess_level_writes_le_one | tess_level_writes_le_two) &
308 ~tess_level_writes_le_zero & ~tess_level_writes_other &
309 max_valid_outer_comp_mask) ==
310 (tess_level_writes_any & max_valid_outer_comp_mask) &&
311 /* The [-inf, 2] inner range. */
312 ((tess_level_writes_le_zero | tess_level_writes_le_one |
313 tess_level_writes_le_two) & ~tess_level_writes_other &
314 max_valid_inner_comp_mask) ==
315 (tess_level_writes_any & max_valid_inner_comp_mask);
316
317 if (prim == TESS_PRIMITIVE_UNSPECIFIED) {
318 info->all_tess_levels_are_effectively_one = isolines_are_eff_one &&
319 triquads_are_eff_one;
320 } else if (prim == TESS_PRIMITIVE_ISOLINES) {
321 info->all_tess_levels_are_effectively_one = isolines_are_eff_one;
322 } else {
323 info->all_tess_levels_are_effectively_one = triquads_are_eff_one;
324 }
325 break;
326 }
327 }
328
329 assert(!info->all_tess_levels_are_effectively_zero ||
330 !info->all_tess_levels_are_effectively_one);
331
332 info->discards_patches =
333 (tess_level_writes_le_zero & min_valid_outer_comp_mask) != 0;
334 }
335