• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_control_flow.h"
27 
28 #include "dxil_nir.h"
29 
30 static void
remove_hs_intrinsics(nir_function_impl * impl)31 remove_hs_intrinsics(nir_function_impl *impl)
32 {
33    nir_foreach_block(block, impl) {
34       nir_foreach_instr_safe(instr, block) {
35          if (instr->type != nir_instr_type_intrinsic)
36             continue;
37          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
38          if (intr->intrinsic != nir_intrinsic_store_output &&
39              intr->intrinsic != nir_intrinsic_memory_barrier_tcs_patch &&
40              intr->intrinsic != nir_intrinsic_control_barrier)
41             continue;
42          nir_instr_remove(instr);
43       }
44    }
45    nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance);
46 }
47 
48 static void
49 add_instr_and_srcs_to_set(struct set *instr_set, nir_instr *instr);
50 
51 static bool
add_srcs_to_set(nir_src * src,void * state)52 add_srcs_to_set(nir_src *src, void *state)
53 {
54    assert(src->is_ssa);
55    add_instr_and_srcs_to_set(state, src->ssa->parent_instr);
56    return true;
57 }
58 
59 static void
add_instr_and_srcs_to_set(struct set * instr_set,nir_instr * instr)60 add_instr_and_srcs_to_set(struct set *instr_set, nir_instr *instr)
61 {
62    bool was_already_found = false;
63    _mesa_set_search_or_add(instr_set, instr, &was_already_found);
64    if (!was_already_found)
65       nir_foreach_src(instr, add_srcs_to_set, instr_set);
66 }
67 
68 static void
prune_patch_function_to_intrinsic_and_srcs(nir_function_impl * impl)69 prune_patch_function_to_intrinsic_and_srcs(nir_function_impl *impl)
70 {
71    struct set *instr_set = _mesa_pointer_set_create(NULL);
72 
73    /* Do this in two phases:
74     * 1. Find all instructions that contribute to a store_output and add them to
75     *    the set. Also, add instructions that contribute to control flow.
76     * 2. Erase every instruction that isn't in the set
77     */
78    nir_foreach_block(block, impl) {
79       nir_if *following_if = nir_block_get_following_if(block);
80       if (following_if) {
81          assert(following_if->condition.is_ssa);
82          add_instr_and_srcs_to_set(instr_set, following_if->condition.ssa->parent_instr);
83       }
84       nir_foreach_instr_safe(instr, block) {
85          if (instr->type == nir_instr_type_intrinsic) {
86             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
87             if (intr->intrinsic != nir_intrinsic_store_output &&
88                 intr->intrinsic != nir_intrinsic_memory_barrier_tcs_patch)
89                continue;
90          } else if (instr->type != nir_instr_type_jump)
91             continue;
92          add_instr_and_srcs_to_set(instr_set, instr);
93       }
94    }
95 
96    nir_foreach_block_reverse(block, impl) {
97       nir_foreach_instr_reverse_safe(instr, block) {
98          struct set_entry *entry = _mesa_set_search(instr_set, instr);
99          if (!entry)
100             nir_instr_remove(instr);
101       }
102    }
103 
104    _mesa_set_destroy(instr_set, NULL);
105 }
106 
107 static nir_cursor
get_cursor_for_instr_without_cf(nir_instr * instr)108 get_cursor_for_instr_without_cf(nir_instr *instr)
109 {
110    nir_block *block = instr->block;
111    if (block->cf_node.parent->type == nir_cf_node_function)
112       return nir_before_instr(instr);
113 
114    do {
115       block = nir_cf_node_as_block(nir_cf_node_prev(block->cf_node.parent));
116    } while (block->cf_node.parent->type != nir_cf_node_function);
117    return nir_after_block_before_jump(block);
118 }
119 
120 struct tcs_patch_loop_state {
121    nir_ssa_def *deref, *count;
122    nir_cursor begin_cursor, end_cursor, insert_cursor;
123    nir_loop *loop;
124 };
125 
126 static void
start_tcs_loop(nir_builder * b,struct tcs_patch_loop_state * state,nir_deref_instr * loop_var_deref)127 start_tcs_loop(nir_builder *b, struct tcs_patch_loop_state *state, nir_deref_instr *loop_var_deref)
128 {
129    if (!loop_var_deref)
130       return;
131 
132    nir_store_deref(b, loop_var_deref, nir_imm_int(b, 0), 1);
133    state->loop = nir_push_loop(b);
134    state->count = nir_load_deref(b, loop_var_deref);
135    nir_push_if(b, nir_ige(b, state->count, nir_imm_int(b, b->impl->function->shader->info.tess.tcs_vertices_out)));
136    nir_jump(b, nir_jump_break);
137    nir_pop_if(b, NULL);
138    state->insert_cursor = b->cursor;
139    nir_store_deref(b, loop_var_deref, nir_iadd_imm(b, state->count, 1), 1);
140    nir_pop_loop(b, state->loop);
141 }
142 
143 static void
end_tcs_loop(nir_builder * b,struct tcs_patch_loop_state * state)144 end_tcs_loop(nir_builder *b, struct tcs_patch_loop_state *state)
145 {
146    if (!state->loop)
147       return;
148 
149    nir_cf_list extracted;
150    nir_cf_extract(&extracted, state->begin_cursor, state->end_cursor);
151    nir_cf_reinsert(&extracted, state->insert_cursor);
152 
153    *state = (struct tcs_patch_loop_state ){ 0 };
154 }
155 
156 /* In HLSL/DXIL, the hull (tesselation control) shader is split into two:
157  * 1. The main hull shader, which runs once per output control point.
158  * 2. A patch constant function, which runs once overall.
159  * In GLSL/NIR, these are combined. Each invocation must write to the output
160  * array with a constant gl_InvocationID, which is (apparently) lowered to an
161  * if/else ladder in nir. Each invocation must write the same value to patch
162  * constants - or else undefined behavior strikes. NIR uses store_output to
163  * write the patch constants, and store_per_vertex_output to write the control
164  * point values.
165  *
166  * We clone the NIR function to produce 2: one with the store_output intrinsics
167  * removed, which becomes the main shader (only writes control points), and one
168  * with everything that doesn't contribute to store_output removed, which becomes
169  * the patch constant function.
170  *
171  * For the patch constant function, if the expressions rely on gl_InvocationID,
172  * then we need to run the resulting logic in a loop, using the loop counter to
173  * replace gl_InvocationID. This loop can be terminated when a barrier is hit. If
174  * gl_InvocationID is used again after the barrier, then another loop needs to begin.
175  */
176 void
dxil_nir_split_tess_ctrl(nir_shader * nir,nir_function ** patch_const_func)177 dxil_nir_split_tess_ctrl(nir_shader *nir, nir_function **patch_const_func)
178 {
179    assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
180    assert(exec_list_length(&nir->functions) == 1);
181    nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
182 
183    *patch_const_func = nir_function_create(nir, "PatchConstantFunc");
184    nir_function_impl *patch_const_func_impl = nir_function_impl_clone(nir, entrypoint);
185    (*patch_const_func)->impl = patch_const_func_impl;
186    patch_const_func_impl->function = *patch_const_func;
187 
188    remove_hs_intrinsics(entrypoint);
189    prune_patch_function_to_intrinsic_and_srcs(patch_const_func_impl);
190 
191    /* Kill dead references to the invocation ID from the patch const func so we don't
192     * insert unnecessarily loops
193     */
194    bool progress;
195    do {
196       progress = false;
197       progress |= nir_opt_dead_cf(nir);
198       progress |= nir_opt_dce(nir);
199    } while (progress);
200 
201    /* Now, the patch constant function needs to be split into blocks and loops.
202     * The series of instructions up to the first block containing a load_invocation_id
203     * will run sequentially. Then a loop is inserted so load_invocation_id will load the
204     * loop counter. This loop continues until a barrier is reached, when the loop
205     * is closed and the process begins again.
206     *
207     * First, sink load_invocation_id so that it's present on both sides of barriers.
208     * Each use gets a unique load of the invocation ID.
209     */
210    nir_builder b;
211    nir_builder_init(&b, patch_const_func_impl);
212    nir_foreach_block(block, patch_const_func_impl) {
213       nir_foreach_instr_safe(instr, block) {
214          if (instr->type != nir_instr_type_intrinsic)
215             continue;
216          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
217          if (intr->intrinsic != nir_intrinsic_load_invocation_id ||
218              list_length(&intr->dest.ssa.uses) +
219              list_length(&intr->dest.ssa.if_uses) <= 1)
220             continue;
221          nir_foreach_use_safe(src, &intr->dest.ssa) {
222             b.cursor = nir_before_src(src, false);
223             nir_instr_rewrite_src_ssa(src->parent_instr, src, nir_load_invocation_id(&b));
224          }
225          nir_foreach_if_use_safe(src, &intr->dest.ssa) {
226             b.cursor = nir_before_src(src, true);
227             nir_if_rewrite_condition_ssa(src->parent_if, src, nir_load_invocation_id(&b));
228          }
229          nir_instr_remove(instr);
230       }
231    }
232 
233    /* Now replace those invocation ID loads with loads of a local variable that's used as a loop counter */
234    nir_variable *loop_var = NULL;
235    nir_deref_instr *loop_var_deref = NULL;
236    struct tcs_patch_loop_state state = { 0 };
237    nir_foreach_block_safe(block, patch_const_func_impl) {
238       nir_foreach_instr_safe(instr, block) {
239          if (instr->type != nir_instr_type_intrinsic)
240             continue;
241          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
242          switch (intr->intrinsic) {
243          case nir_intrinsic_load_invocation_id: {
244             if (!loop_var) {
245                loop_var = nir_local_variable_create(patch_const_func_impl, glsl_int_type(), "PatchConstInvocId");
246                b.cursor = nir_before_cf_list(&patch_const_func_impl->body);
247                loop_var_deref = nir_build_deref_var(&b, loop_var);
248             }
249             if (!state.loop) {
250                b.cursor = state.begin_cursor = get_cursor_for_instr_without_cf(instr);
251                start_tcs_loop(&b, &state, loop_var_deref);
252             }
253             nir_ssa_def_rewrite_uses(&intr->dest.ssa, state.count);
254             break;
255          }
256          case nir_intrinsic_memory_barrier_tcs_patch:
257             /* The GL tessellation spec says:
258              * The barrier() function may only be called inside the main entry point of the tessellation control shader
259              * and may not be called in potentially divergent flow control.  In particular, barrier() may not be called
260              * inside a switch statement, in either sub-statement of an if statement, inside a do, for, or while loop,
261              * or at any point after a return statement in the function main().
262              *
263              * Therefore, we should be at function-level control flow.
264              */
265             assert(nir_cursors_equal(nir_before_instr(instr), get_cursor_for_instr_without_cf(instr)));
266             state.end_cursor = nir_before_instr(instr);
267             end_tcs_loop(&b, &state);
268             nir_instr_remove(instr);
269             break;
270          default:
271             break;
272          }
273       }
274    }
275    state.end_cursor = nir_after_block_before_jump(nir_impl_last_block(patch_const_func_impl));
276    end_tcs_loop(&b, &state);
277 }
278 
279 struct remove_tess_level_accesses_data {
280    unsigned location;
281    unsigned size;
282 };
283 
284 static bool
remove_tess_level_accesses(nir_builder * b,nir_instr * instr,void * _data)285 remove_tess_level_accesses(nir_builder *b, nir_instr *instr, void *_data)
286 {
287    struct remove_tess_level_accesses_data *data = _data;
288    if (instr->type != nir_instr_type_intrinsic)
289       return false;
290 
291    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
292    if (intr->intrinsic != nir_intrinsic_store_output &&
293        intr->intrinsic != nir_intrinsic_load_input)
294       return false;
295 
296    nir_io_semantics io = nir_intrinsic_io_semantics(intr);
297    if (io.location != data->location)
298       return false;
299 
300    if (nir_intrinsic_component(intr) < data->size)
301       return false;
302 
303    if (intr->intrinsic == nir_intrinsic_store_output) {
304       assert(intr->src[0].is_ssa && intr->src[0].ssa->num_components == 1);
305       nir_instr_remove(instr);
306    } else {
307       b->cursor = nir_after_instr(instr);
308       assert(intr->dest.is_ssa && intr->dest.ssa.num_components == 1);
309       nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_ssa_undef(b, 1, intr->dest.ssa.bit_size));
310    }
311    return true;
312 }
313 
314 /* Update the types of the tess level variables and remove writes to removed components.
315  * GL always has a 4-component outer tess level and 2-component inner, while D3D requires
316  * the number of components to vary based on the primitive mode.
317  * The 4 and 2 is for quads, while triangles are 3 and 1, and lines are 2 and 0.
318  */
319 bool
dxil_nir_fixup_tess_level_for_domain(nir_shader * nir)320 dxil_nir_fixup_tess_level_for_domain(nir_shader *nir)
321 {
322    bool progress = false;
323    if (nir->info.tess._primitive_mode != TESS_PRIMITIVE_QUADS) {
324       nir_foreach_variable_with_modes_safe(var, nir, nir_var_shader_out | nir_var_shader_in) {
325          unsigned new_array_size = 4;
326          unsigned old_array_size = glsl_array_size(var->type);
327          if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) {
328             new_array_size = nir->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 3 : 2;
329             assert(var->data.compact && (old_array_size == 4 || old_array_size == new_array_size));
330          } else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER) {
331             new_array_size = nir->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES ? 1 : 0;
332             assert(var->data.compact && (old_array_size == 2 || old_array_size == new_array_size));
333          } else
334             continue;
335 
336          if (new_array_size == old_array_size)
337             continue;
338 
339          progress = true;
340          if (new_array_size)
341             var->type = glsl_array_type(glsl_float_type(), new_array_size, 0);
342          else {
343             exec_node_remove(&var->node);
344             ralloc_free(var);
345          }
346 
347          struct remove_tess_level_accesses_data pass_data = {
348             .location = var->data.location,
349             .size = new_array_size
350          };
351 
352          nir_shader_instructions_pass(nir, remove_tess_level_accesses,
353             nir_metadata_block_index | nir_metadata_dominance, &pass_data);
354       }
355    }
356    return progress;
357 }
358 
359 static bool
tcs_update_deref_input_types(nir_builder * b,nir_instr * instr,void * data)360 tcs_update_deref_input_types(nir_builder *b, nir_instr *instr, void *data)
361 {
362    if (instr->type != nir_instr_type_deref)
363       return false;
364 
365    nir_deref_instr *deref = nir_instr_as_deref(instr);
366    if (deref->deref_type != nir_deref_type_var)
367       return false;
368 
369    nir_variable *var = deref->var;
370    deref->type = var->type;
371    return true;
372 }
373 
374 bool
dxil_nir_set_tcs_patches_in(nir_shader * nir,unsigned num_control_points)375 dxil_nir_set_tcs_patches_in(nir_shader *nir, unsigned num_control_points)
376 {
377    bool progress = false;
378    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
379       if (nir_is_arrayed_io(var, MESA_SHADER_TESS_CTRL)) {
380          var->type = glsl_array_type(glsl_get_array_element(var->type), num_control_points, 0);
381          progress = true;
382       }
383    }
384 
385    if (progress)
386       nir_shader_instructions_pass(nir, tcs_update_deref_input_types, nir_metadata_all, NULL);
387 
388    return progress;
389 }
390