• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 
27 /* This pass computes for each ssa definition if it is uniform.
28  * That is, the variable has the same value for all invocations
29  * of the group.
30  *
31  * This divergence analysis pass expects the shader to be in LCSSA-form.
32  *
33  * This algorithm implements "The Simple Divergence Analysis" from
34  * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35  * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36  * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37  */
38 
39 struct divergence_state {
40    const gl_shader_stage stage;
41    nir_shader *shader;
42    nir_function_impl *impl;
43    nir_divergence_options options;
44    nir_loop *loop;
45    bool loop_all_invariant;
46 
47    /* Whether the caller requested vertex divergence (meaning between vertices
48     * of the same primitive) instead of subgroup invocation divergence
49     * (between invocations of the same subgroup). For example, patch input
50     * loads are always convergent, while subgroup intrinsics are divergent
51     * because vertices of the same primitive can be processed by different
52     * subgroups.
53     */
54    bool vertex_divergence;
55 
56    /** current control flow state */
57    /* True if some loop-active invocations might take a different control-flow path.
58     * A divergent break does not cause subsequent control-flow to be considered
59     * divergent because those invocations are no longer active in the loop.
60     * For a divergent if, both sides are considered divergent flow because
61     * the other side is still loop-active. */
62    bool divergent_loop_cf;
63    /* True if a divergent continue happened since the loop header */
64    bool divergent_loop_continue;
65    /* True if a divergent break happened since the loop header */
66    bool divergent_loop_break;
67 
68    /* True if we visit the block for the fist time */
69    bool first_visit;
70    /* True if we visit a block that is dominated by a loop with a divergent break */
71    bool consider_loop_invariance;
72 };
73 
74 static bool
75 visit_cf_list(struct exec_list *list, struct divergence_state *state);
76 
77 bool
nir_src_is_divergent(nir_src * src)78 nir_src_is_divergent(nir_src *src)
79 {
80    if (src->ssa->divergent)
81       return true;
82 
83    nir_cf_node *use_node = nir_src_get_block(src)->cf_node.parent;
84    nir_cf_node *def_node = src->ssa->parent_instr->block->cf_node.parent;
85 
86    /* Short-cut the common case. */
87    if (def_node == use_node)
88       return false;
89 
90    /* If the source was computed in a divergent loop, and is not
91     * loop-invariant, then it must also be considered divergent.
92     */
93    bool loop_invariant = src->ssa->loop_invariant;
94    while (def_node) {
95       if (def_node->type == nir_cf_node_loop) {
96          /* Check whether the use is inside this loop. */
97          for (nir_cf_node *node = use_node; node != NULL; node = node->parent) {
98             if (def_node == node)
99                return false;
100          }
101 
102          /* Because the use is outside of this loop, it is divergent. */
103          if (nir_cf_node_as_loop(def_node)->divergent_break && !loop_invariant)
104             return true;
105 
106          /* For outer loops, consider this variable not loop invariant. */
107          loop_invariant = false;
108       }
109 
110       def_node = def_node->parent;
111    }
112 
113    return false;
114 }
115 
116 static inline bool
src_divergent(nir_src src,struct divergence_state * state)117 src_divergent(nir_src src, struct divergence_state *state)
118 {
119    if (!state->consider_loop_invariance)
120       return src.ssa->divergent;
121 
122    return nir_src_is_divergent(&src);
123 }
124 
125 static inline bool
src_invariant(nir_src * src,void * loop)126 src_invariant(nir_src *src, void *loop)
127 {
128    nir_block *first_block = nir_loop_first_block(loop);
129 
130    /* Invariant if SSA is defined before the current loop. */
131    if (src->ssa->parent_instr->block->index < first_block->index)
132       return true;
133 
134    if (!src->ssa->loop_invariant)
135       return false;
136 
137    /* The value might be defined in a nested loop. */
138    nir_cf_node *cf_node = src->ssa->parent_instr->block->cf_node.parent;
139    while (cf_node->type != nir_cf_node_loop)
140       cf_node = cf_node->parent;
141 
142    return nir_cf_node_as_loop(cf_node) == loop;
143 }
144 
145 static bool
visit_alu(nir_alu_instr * instr,struct divergence_state * state)146 visit_alu(nir_alu_instr *instr, struct divergence_state *state)
147 {
148    if (instr->def.divergent)
149       return false;
150 
151    unsigned num_src = nir_op_infos[instr->op].num_inputs;
152 
153    for (unsigned i = 0; i < num_src; i++) {
154       if (src_divergent(instr->src[i].src, state)) {
155          instr->def.divergent = true;
156          return true;
157       }
158    }
159 
160    return false;
161 }
162 
163 
164 /* On some HW uniform loads where there is a pending store/atomic from another
165  * wave can "tear" so that different invocations see the pre-store value and
166  * the post-store value even though they are loading from the same location.
167  * This means we have to assume it's not uniform unless it's readonly.
168  *
169  * TODO The Vulkan memory model is much more strict here and requires an
170  * atomic or volatile load for the data race to be valid, which could allow us
171  * to do better if it's in use, however we currently don't have that
172  * information plumbed through.
173  */
174 static bool
load_may_tear(struct divergence_state * state,nir_intrinsic_instr * instr)175 load_may_tear(struct divergence_state *state, nir_intrinsic_instr *instr)
176 {
177    return (state->options & nir_divergence_uniform_load_tears) &&
178           !(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE);
179 }
180 
181 static bool
visit_intrinsic(nir_intrinsic_instr * instr,struct divergence_state * state)182 visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
183 {
184    if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
185       return false;
186 
187    if (instr->def.divergent)
188       return false;
189 
190    nir_divergence_options options = state->options;
191    gl_shader_stage stage = state->stage;
192    bool is_divergent = false;
193    switch (instr->intrinsic) {
194    case nir_intrinsic_shader_clock:
195    case nir_intrinsic_ballot:
196    case nir_intrinsic_ballot_relaxed:
197    case nir_intrinsic_as_uniform:
198    case nir_intrinsic_read_invocation:
199    case nir_intrinsic_read_first_invocation:
200    case nir_intrinsic_read_invocation_cond_ir3:
201    case nir_intrinsic_read_getlast_ir3:
202    case nir_intrinsic_vote_any:
203    case nir_intrinsic_vote_all:
204    case nir_intrinsic_vote_feq:
205    case nir_intrinsic_vote_ieq:
206    case nir_intrinsic_first_invocation:
207    case nir_intrinsic_last_invocation:
208    case nir_intrinsic_load_subgroup_id:
209    case nir_intrinsic_shared_append_amd:
210    case nir_intrinsic_shared_consume_amd:
211       /* VS/TES/GS invocations of the same primitive can be in different
212        * subgroups, so subgroup ops are always divergent between vertices of
213        * the same primitive.
214        */
215       is_divergent = state->vertex_divergence;
216       break;
217 
218    /* Intrinsics which are always uniform */
219    case nir_intrinsic_load_preamble:
220    case nir_intrinsic_load_push_constant:
221    case nir_intrinsic_load_push_constant_zink:
222    case nir_intrinsic_load_work_dim:
223    case nir_intrinsic_load_num_workgroups:
224    case nir_intrinsic_load_workgroup_size:
225    case nir_intrinsic_load_num_subgroups:
226    case nir_intrinsic_load_ray_launch_size:
227    case nir_intrinsic_load_sbt_base_amd:
228    case nir_intrinsic_load_subgroup_size:
229    case nir_intrinsic_load_subgroup_id_shift_ir3:
230    case nir_intrinsic_load_base_instance:
231    case nir_intrinsic_load_base_vertex:
232    case nir_intrinsic_load_first_vertex:
233    case nir_intrinsic_load_draw_id:
234    case nir_intrinsic_load_is_indexed_draw:
235    case nir_intrinsic_load_viewport_scale:
236    case nir_intrinsic_load_user_clip_plane:
237    case nir_intrinsic_load_viewport_x_scale:
238    case nir_intrinsic_load_viewport_y_scale:
239    case nir_intrinsic_load_viewport_z_scale:
240    case nir_intrinsic_load_viewport_offset:
241    case nir_intrinsic_load_viewport_x_offset:
242    case nir_intrinsic_load_viewport_y_offset:
243    case nir_intrinsic_load_viewport_z_offset:
244    case nir_intrinsic_load_cull_triangle_viewport_xy_scale_and_offset_amd:
245    case nir_intrinsic_load_cull_line_viewport_xy_scale_and_offset_amd:
246    case nir_intrinsic_load_blend_const_color_a_float:
247    case nir_intrinsic_load_blend_const_color_b_float:
248    case nir_intrinsic_load_blend_const_color_g_float:
249    case nir_intrinsic_load_blend_const_color_r_float:
250    case nir_intrinsic_load_blend_const_color_rgba:
251    case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
252    case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
253    case nir_intrinsic_load_line_width:
254    case nir_intrinsic_load_aa_line_width:
255    case nir_intrinsic_load_xfb_address:
256    case nir_intrinsic_load_num_vertices:
257    case nir_intrinsic_load_fb_layers_v3d:
258    case nir_intrinsic_load_fep_w_v3d:
259    case nir_intrinsic_load_tcs_num_patches_amd:
260    case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
261    case nir_intrinsic_load_tcs_primitive_mode_amd:
262    case nir_intrinsic_load_patch_vertices_in:
263    case nir_intrinsic_load_ring_tess_factors_amd:
264    case nir_intrinsic_load_ring_tess_offchip_amd:
265    case nir_intrinsic_load_ring_tess_factors_offset_amd:
266    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
267    case nir_intrinsic_load_ring_mesh_scratch_amd:
268    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
269    case nir_intrinsic_load_ring_esgs_amd:
270    case nir_intrinsic_load_ring_es2gs_offset_amd:
271    case nir_intrinsic_load_ring_task_draw_amd:
272    case nir_intrinsic_load_ring_task_payload_amd:
273    case nir_intrinsic_load_sample_positions_amd:
274    case nir_intrinsic_load_rasterization_samples_amd:
275    case nir_intrinsic_load_ring_gsvs_amd:
276    case nir_intrinsic_load_ring_gs2vs_offset_amd:
277    case nir_intrinsic_load_streamout_config_amd:
278    case nir_intrinsic_load_streamout_write_index_amd:
279    case nir_intrinsic_load_streamout_offset_amd:
280    case nir_intrinsic_load_task_ring_entry_amd:
281    case nir_intrinsic_load_ring_attr_amd:
282    case nir_intrinsic_load_ring_attr_offset_amd:
283    case nir_intrinsic_load_provoking_vtx_amd:
284    case nir_intrinsic_load_sample_positions_pan:
285    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
286    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
287    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
288    case nir_intrinsic_load_prim_gen_query_enabled_amd:
289    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
290    case nir_intrinsic_load_merged_wave_info_amd:
291    case nir_intrinsic_load_clamp_vertex_color_amd:
292    case nir_intrinsic_load_cull_front_face_enabled_amd:
293    case nir_intrinsic_load_cull_back_face_enabled_amd:
294    case nir_intrinsic_load_cull_ccw_amd:
295    case nir_intrinsic_load_cull_small_triangles_enabled_amd:
296    case nir_intrinsic_load_cull_small_lines_enabled_amd:
297    case nir_intrinsic_load_cull_any_enabled_amd:
298    case nir_intrinsic_load_cull_small_triangle_precision_amd:
299    case nir_intrinsic_load_cull_small_line_precision_amd:
300    case nir_intrinsic_load_user_data_amd:
301    case nir_intrinsic_load_force_vrs_rates_amd:
302    case nir_intrinsic_load_tess_level_inner_default:
303    case nir_intrinsic_load_tess_level_outer_default:
304    case nir_intrinsic_load_scalar_arg_amd:
305    case nir_intrinsic_load_smem_amd:
306    case nir_intrinsic_load_resume_shader_address_amd:
307    case nir_intrinsic_load_reloc_const_intel:
308    case nir_intrinsic_load_btd_global_arg_addr_intel:
309    case nir_intrinsic_load_btd_local_arg_addr_intel:
310    case nir_intrinsic_load_inline_data_intel:
311    case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
312    case nir_intrinsic_load_lshs_vertex_stride_amd:
313    case nir_intrinsic_load_esgs_vertex_stride_amd:
314    case nir_intrinsic_load_hs_out_patch_data_offset_amd:
315    case nir_intrinsic_load_clip_half_line_width_amd:
316    case nir_intrinsic_load_num_vertices_per_primitive_amd:
317    case nir_intrinsic_load_streamout_buffer_amd:
318    case nir_intrinsic_load_ordered_id_amd:
319    case nir_intrinsic_load_gs_wave_id_amd:
320    case nir_intrinsic_load_provoking_vtx_in_prim_amd:
321    case nir_intrinsic_load_lds_ngg_scratch_base_amd:
322    case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
323    case nir_intrinsic_load_btd_shader_type_intel:
324    case nir_intrinsic_load_base_global_invocation_id:
325    case nir_intrinsic_load_base_workgroup_id:
326    case nir_intrinsic_load_alpha_reference_amd:
327    case nir_intrinsic_load_ubo_uniform_block_intel:
328    case nir_intrinsic_load_ssbo_uniform_block_intel:
329    case nir_intrinsic_load_shared_uniform_block_intel:
330    case nir_intrinsic_load_barycentric_optimize_amd:
331    case nir_intrinsic_load_poly_line_smooth_enabled:
332    case nir_intrinsic_load_rasterization_primitive_amd:
333    case nir_intrinsic_unit_test_uniform_amd:
334    case nir_intrinsic_load_global_constant_uniform_block_intel:
335    case nir_intrinsic_load_debug_log_desc_amd:
336    case nir_intrinsic_load_xfb_state_address_gfx12_amd:
337    case nir_intrinsic_cmat_length:
338    case nir_intrinsic_load_vs_primitive_stride_ir3:
339    case nir_intrinsic_load_vs_vertex_stride_ir3:
340    case nir_intrinsic_load_hs_patch_stride_ir3:
341    case nir_intrinsic_load_tess_factor_base_ir3:
342    case nir_intrinsic_load_tess_param_base_ir3:
343    case nir_intrinsic_load_primitive_location_ir3:
344    case nir_intrinsic_preamble_start_ir3:
345    case nir_intrinsic_optimization_barrier_sgpr_amd:
346    case nir_intrinsic_load_fbfetch_image_fmask_desc_amd:
347    case nir_intrinsic_load_fbfetch_image_desc_amd:
348    case nir_intrinsic_load_polygon_stipple_buffer_amd:
349    case nir_intrinsic_load_printf_buffer_address:
350    case nir_intrinsic_load_printf_buffer_size:
351    case nir_intrinsic_load_printf_base_identifier:
352    case nir_intrinsic_load_core_id_agx:
353    case nir_intrinsic_load_samples_log2_agx:
354    case nir_intrinsic_load_active_subgroup_count_agx:
355    case nir_intrinsic_load_constant_base_ptr:
356       is_divergent = false;
357       break;
358 
359    /* This is divergent because it specifically loads sequential values into
360     * successive SIMD lanes.
361     */
362    case nir_intrinsic_load_global_block_intel:
363       is_divergent = true;
364       break;
365 
366    case nir_intrinsic_decl_reg:
367       is_divergent = nir_intrinsic_divergent(instr);
368       break;
369 
370    /* Intrinsics with divergence depending on shader stage and hardware */
371    case nir_intrinsic_load_shader_record_ptr:
372       is_divergent = !(options & nir_divergence_shader_record_ptr_uniform);
373       break;
374    case nir_intrinsic_load_frag_shading_rate:
375       is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
376       break;
377    case nir_intrinsic_load_input:
378    case nir_intrinsic_load_per_primitive_input:
379       is_divergent = src_divergent(instr->src[0], state);
380 
381       if (stage == MESA_SHADER_FRAGMENT) {
382          is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
383       } else if (stage == MESA_SHADER_TESS_EVAL) {
384          /* Patch input loads are uniform between vertices of the same primitive. */
385          if (state->vertex_divergence)
386             is_divergent = false;
387          else
388             is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
389       } else {
390          is_divergent = true;
391       }
392       break;
393    case nir_intrinsic_load_attribute_pan:
394       assert(stage == MESA_SHADER_VERTEX);
395       is_divergent = src_divergent(instr->src[0], state) ||
396                      src_divergent(instr->src[1], state) ||
397                      src_divergent(instr->src[2], state);
398       break;
399    case nir_intrinsic_load_per_vertex_input:
400       is_divergent = src_divergent(instr->src[0], state) ||
401                      src_divergent(instr->src[1], state);
402       if (stage == MESA_SHADER_TESS_CTRL)
403          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
404       if (stage == MESA_SHADER_TESS_EVAL)
405          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
406       else
407          is_divergent = true;
408       break;
409    case nir_intrinsic_load_input_vertex:
410       is_divergent = src_divergent(instr->src[1], state);
411       assert(stage == MESA_SHADER_FRAGMENT);
412       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
413       break;
414    case nir_intrinsic_load_output:
415       is_divergent = src_divergent(instr->src[0], state);
416       switch (stage) {
417       case MESA_SHADER_TESS_CTRL:
418          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
419          break;
420       case MESA_SHADER_FRAGMENT:
421          is_divergent = true;
422          break;
423       case MESA_SHADER_TASK:
424       case MESA_SHADER_MESH:
425          /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs).
426           * Divergent if src[0] is, so nothing else to do.
427           */
428          break;
429       default:
430          unreachable("Invalid stage for load_output");
431       }
432       break;
433    case nir_intrinsic_load_per_view_output:
434       is_divergent = instr->src[0].ssa->divergent ||
435                      instr->src[1].ssa->divergent ||
436                      (stage == MESA_SHADER_TESS_CTRL &&
437                       !(options & nir_divergence_single_patch_per_tcs_subgroup));
438       break;
439    case nir_intrinsic_load_per_vertex_output:
440       /* TCS and NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
441       assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
442       is_divergent = src_divergent(instr->src[0], state) ||
443                      src_divergent(instr->src[1], state) ||
444                      (stage == MESA_SHADER_TESS_CTRL &&
445                       !(options & nir_divergence_single_patch_per_tcs_subgroup));
446       break;
447    case nir_intrinsic_load_per_primitive_output:
448       /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
449       assert(stage == MESA_SHADER_MESH);
450       is_divergent = src_divergent(instr->src[0], state) ||
451                      src_divergent(instr->src[1], state);
452       break;
453    case nir_intrinsic_load_layer_id:
454    case nir_intrinsic_load_front_face:
455    case nir_intrinsic_load_front_face_fsign:
456    case nir_intrinsic_load_back_face_agx:
457       assert(stage == MESA_SHADER_FRAGMENT || state->shader->info.internal);
458       is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
459       break;
460    case nir_intrinsic_load_view_index:
461       assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
462       if (options & nir_divergence_view_index_uniform)
463          is_divergent = false;
464       else if (stage == MESA_SHADER_FRAGMENT)
465          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
466       else
467          is_divergent = true;
468       break;
469    case nir_intrinsic_load_fs_input_interp_deltas:
470       assert(stage == MESA_SHADER_FRAGMENT);
471       is_divergent = src_divergent(instr->src[0], state);
472       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
473       break;
474    case nir_intrinsic_load_instance_id:
475       is_divergent = !state->vertex_divergence;
476       break;
477    case nir_intrinsic_load_primitive_id:
478       if (stage == MESA_SHADER_FRAGMENT)
479          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
480       else if (stage == MESA_SHADER_TESS_CTRL)
481          is_divergent = !state->vertex_divergence &&
482                         !(options & nir_divergence_single_patch_per_tcs_subgroup);
483       else if (stage == MESA_SHADER_TESS_EVAL)
484          is_divergent = !state->vertex_divergence &&
485                         !(options & nir_divergence_single_patch_per_tes_subgroup);
486       else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
487          is_divergent = !state->vertex_divergence;
488       else if (stage == MESA_SHADER_ANY_HIT ||
489                stage == MESA_SHADER_CLOSEST_HIT ||
490                stage == MESA_SHADER_INTERSECTION)
491          is_divergent = true;
492       else
493          unreachable("Invalid stage for load_primitive_id");
494       break;
495    case nir_intrinsic_load_tess_level_inner:
496    case nir_intrinsic_load_tess_level_outer:
497       if (stage == MESA_SHADER_TESS_CTRL)
498          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
499       else if (stage == MESA_SHADER_TESS_EVAL)
500          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
501       else
502          unreachable("Invalid stage for load_primitive_tess_level_*");
503       break;
504 
505    case nir_intrinsic_load_workgroup_index:
506    case nir_intrinsic_load_workgroup_id:
507       assert(gl_shader_stage_uses_workgroup(stage) || stage == MESA_SHADER_TESS_CTRL);
508       if (stage == MESA_SHADER_COMPUTE)
509          is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
510       break;
511 
512    /* Clustered reductions are uniform if cluster_size == subgroup_size or
513     * the source is uniform and the operation is invariant.
514     * Inclusive scans are uniform if
515     * the source is uniform and the operation is invariant
516     */
517    case nir_intrinsic_reduce:
518       if (nir_intrinsic_cluster_size(instr) == 0) {
519          /* Cluster size of 0 means the subgroup size.
520           * This is uniform within a subgroup, but divergent between
521           * vertices of the same primitive because they may be in
522           * different subgroups.
523           */
524          is_divergent = state->vertex_divergence;
525          break;
526       }
527       FALLTHROUGH;
528    case nir_intrinsic_inclusive_scan:
529    case nir_intrinsic_inclusive_scan_clusters_ir3: {
530       nir_op op = nir_intrinsic_reduction_op(instr);
531       is_divergent = src_divergent(instr->src[0], state) ||
532                      state->vertex_divergence;
533       if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
534           op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
535           op != nir_op_iand && op != nir_op_ior)
536          is_divergent = true;
537       break;
538    }
539 
540    case nir_intrinsic_reduce_clusters_ir3:
541       /* This reduces the last invocations in all 8-wide clusters. It should
542        * behave the same as reduce with cluster_size == subgroup_size.
543        */
544       is_divergent = state->vertex_divergence;
545       break;
546 
547    case nir_intrinsic_load_ubo:
548    case nir_intrinsic_load_ubo_vec4:
549    case nir_intrinsic_ldc_nv:
550    case nir_intrinsic_ldcx_nv:
551       is_divergent = (src_divergent(instr->src[0], state) &&
552                       (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
553                      src_divergent(instr->src[1], state);
554       break;
555 
556    case nir_intrinsic_load_ssbo:
557    case nir_intrinsic_load_ssbo_ir3:
558    case nir_intrinsic_load_uav_ir3:
559       is_divergent = (src_divergent(instr->src[0], state) &&
560                       (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
561                      src_divergent(instr->src[1], state) ||
562                      load_may_tear(state, instr);
563       break;
564 
565    case nir_intrinsic_load_shared:
566    case nir_intrinsic_load_shared_ir3:
567       is_divergent = src_divergent(instr->src[0], state) ||
568                      (options & nir_divergence_uniform_load_tears);
569       break;
570 
571    case nir_intrinsic_load_global:
572    case nir_intrinsic_load_global_2x32:
573    case nir_intrinsic_load_global_ir3:
574    case nir_intrinsic_load_deref: {
575       if (load_may_tear(state, instr)) {
576          is_divergent = true;
577          break;
578       }
579 
580       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
581       for (unsigned i = 0; i < num_srcs; i++) {
582          if (src_divergent(instr->src[i], state)) {
583             is_divergent = true;
584             break;
585          }
586       }
587       break;
588    }
589 
590    case nir_intrinsic_get_ssbo_size:
591    case nir_intrinsic_deref_buffer_array_length:
592       is_divergent = src_divergent(instr->src[0], state) &&
593                      (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
594       break;
595 
596    case nir_intrinsic_image_samples_identical:
597    case nir_intrinsic_image_deref_samples_identical:
598    case nir_intrinsic_bindless_image_samples_identical:
599    case nir_intrinsic_image_fragment_mask_load_amd:
600    case nir_intrinsic_image_deref_fragment_mask_load_amd:
601    case nir_intrinsic_bindless_image_fragment_mask_load_amd:
602       is_divergent = (src_divergent(instr->src[0], state) &&
603                       (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
604                      src_divergent(instr->src[1], state) ||
605                      load_may_tear(state, instr);
606       break;
607 
608    case nir_intrinsic_image_texel_address:
609    case nir_intrinsic_image_deref_texel_address:
610    case nir_intrinsic_bindless_image_texel_address:
611       is_divergent = (src_divergent(instr->src[0], state) &&
612                       (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
613                      src_divergent(instr->src[1], state) ||
614                      src_divergent(instr->src[2], state);
615       break;
616 
617    case nir_intrinsic_image_load:
618    case nir_intrinsic_image_deref_load:
619    case nir_intrinsic_bindless_image_load:
620    case nir_intrinsic_image_sparse_load:
621    case nir_intrinsic_image_deref_sparse_load:
622    case nir_intrinsic_bindless_image_sparse_load:
623       is_divergent = (src_divergent(instr->src[0], state) &&
624                       (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
625                      src_divergent(instr->src[1], state) ||
626                      src_divergent(instr->src[2], state) ||
627                      src_divergent(instr->src[3], state) ||
628                      load_may_tear(state, instr);
629       break;
630 
631    case nir_intrinsic_optimization_barrier_vgpr_amd:
632       is_divergent = src_divergent(instr->src[0], state);
633       break;
634 
635    /* Intrinsics with divergence depending on sources */
636    case nir_intrinsic_convert_alu_types:
637    case nir_intrinsic_ddx:
638    case nir_intrinsic_ddx_fine:
639    case nir_intrinsic_ddx_coarse:
640    case nir_intrinsic_ddy:
641    case nir_intrinsic_ddy_fine:
642    case nir_intrinsic_ddy_coarse:
643    case nir_intrinsic_ballot_bitfield_extract:
644    case nir_intrinsic_ballot_find_lsb:
645    case nir_intrinsic_ballot_find_msb:
646    case nir_intrinsic_ballot_bit_count_reduce:
647    case nir_intrinsic_rotate:
648    case nir_intrinsic_shuffle_xor:
649    case nir_intrinsic_shuffle_up:
650    case nir_intrinsic_shuffle_down:
651    case nir_intrinsic_shuffle_xor_uniform_ir3:
652    case nir_intrinsic_shuffle_up_uniform_ir3:
653    case nir_intrinsic_shuffle_down_uniform_ir3:
654    case nir_intrinsic_quad_broadcast:
655    case nir_intrinsic_quad_swap_horizontal:
656    case nir_intrinsic_quad_swap_vertical:
657    case nir_intrinsic_quad_swap_diagonal:
658    case nir_intrinsic_quad_vote_any:
659    case nir_intrinsic_quad_vote_all:
660    case nir_intrinsic_load_shared2_amd:
661    case nir_intrinsic_load_global_constant:
662    case nir_intrinsic_load_global_amd:
663    case nir_intrinsic_load_uniform:
664    case nir_intrinsic_load_constant:
665    case nir_intrinsic_load_sample_pos_from_id:
666    case nir_intrinsic_load_kernel_input:
667    case nir_intrinsic_load_task_payload:
668    case nir_intrinsic_load_buffer_amd:
669    case nir_intrinsic_load_typed_buffer_amd:
670    case nir_intrinsic_image_levels:
671    case nir_intrinsic_image_deref_levels:
672    case nir_intrinsic_bindless_image_levels:
673    case nir_intrinsic_image_samples:
674    case nir_intrinsic_image_deref_samples:
675    case nir_intrinsic_bindless_image_samples:
676    case nir_intrinsic_image_size:
677    case nir_intrinsic_image_deref_size:
678    case nir_intrinsic_bindless_image_size:
679    case nir_intrinsic_image_descriptor_amd:
680    case nir_intrinsic_image_deref_descriptor_amd:
681    case nir_intrinsic_bindless_image_descriptor_amd:
682    case nir_intrinsic_strict_wqm_coord_amd:
683    case nir_intrinsic_copy_deref:
684    case nir_intrinsic_vulkan_resource_index:
685    case nir_intrinsic_vulkan_resource_reindex:
686    case nir_intrinsic_load_vulkan_descriptor:
687    case nir_intrinsic_atomic_counter_read:
688    case nir_intrinsic_atomic_counter_read_deref:
689    case nir_intrinsic_quad_swizzle_amd:
690    case nir_intrinsic_masked_swizzle_amd:
691    case nir_intrinsic_is_sparse_texels_resident:
692    case nir_intrinsic_is_sparse_resident_zink:
693    case nir_intrinsic_sparse_residency_code_and:
694    case nir_intrinsic_bvh64_intersect_ray_amd:
695    case nir_intrinsic_image_deref_load_param_intel:
696    case nir_intrinsic_image_load_raw_intel:
697    case nir_intrinsic_get_ubo_size:
698    case nir_intrinsic_load_ssbo_address:
699    case nir_intrinsic_load_global_constant_bounded:
700    case nir_intrinsic_load_global_constant_offset:
701    case nir_intrinsic_load_reg:
702    case nir_intrinsic_load_constant_agx:
703    case nir_intrinsic_load_reg_indirect:
704    case nir_intrinsic_load_const_ir3:
705    case nir_intrinsic_load_frag_size_ir3:
706    case nir_intrinsic_load_frag_offset_ir3:
707    case nir_intrinsic_bindless_resource_ir3:
708    case nir_intrinsic_ray_intersection_ir3: {
709       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
710       for (unsigned i = 0; i < num_srcs; i++) {
711          if (src_divergent(instr->src[i], state)) {
712             is_divergent = true;
713             break;
714          }
715       }
716       break;
717    }
718 
719    case nir_intrinsic_resource_intel:
720       /* Not having the non_uniform flag with divergent sources is undefined
721        * behavior. The Intel driver defines it pick the lowest numbered live
722        * SIMD lane (via emit_uniformize).
723        */
724       if ((nir_intrinsic_resource_access_intel(instr) &
725            nir_resource_intel_non_uniform) != 0) {
726          unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
727          for (unsigned i = 0; i < num_srcs; i++) {
728             if (src_divergent(instr->src[i], state)) {
729                is_divergent = true;
730                break;
731             }
732          }
733       }
734       break;
735 
736    case nir_intrinsic_shuffle:
737       is_divergent = src_divergent(instr->src[0], state) &&
738                      src_divergent(instr->src[1], state);
739       break;
740 
741    case nir_intrinsic_load_param:
742       is_divergent =
743          !state->impl->function->params[nir_intrinsic_param_idx(instr)].is_uniform;
744       break;
745 
746    /* Intrinsics which are always divergent */
747    case nir_intrinsic_inverse_ballot:
748    case nir_intrinsic_load_color0:
749    case nir_intrinsic_load_color1:
750    case nir_intrinsic_load_sample_id:
751    case nir_intrinsic_load_sample_id_no_per_sample:
752    case nir_intrinsic_load_sample_mask_in:
753    case nir_intrinsic_load_interpolated_input:
754    case nir_intrinsic_load_point_coord_maybe_flipped:
755    case nir_intrinsic_load_barycentric_pixel:
756    case nir_intrinsic_load_barycentric_centroid:
757    case nir_intrinsic_load_barycentric_sample:
758    case nir_intrinsic_load_barycentric_model:
759    case nir_intrinsic_load_barycentric_at_sample:
760    case nir_intrinsic_load_barycentric_at_offset:
761    case nir_intrinsic_load_barycentric_at_offset_nv:
762    case nir_intrinsic_load_barycentric_coord_pixel:
763    case nir_intrinsic_load_barycentric_coord_centroid:
764    case nir_intrinsic_load_barycentric_coord_sample:
765    case nir_intrinsic_load_barycentric_coord_at_sample:
766    case nir_intrinsic_load_barycentric_coord_at_offset:
767    case nir_intrinsic_load_persp_center_rhw_ir3:
768    case nir_intrinsic_interp_deref_at_offset:
769    case nir_intrinsic_interp_deref_at_sample:
770    case nir_intrinsic_interp_deref_at_centroid:
771    case nir_intrinsic_interp_deref_at_vertex:
772    case nir_intrinsic_load_tess_coord:
773    case nir_intrinsic_load_tess_coord_xy:
774    case nir_intrinsic_load_point_coord:
775    case nir_intrinsic_load_line_coord:
776    case nir_intrinsic_load_frag_coord:
777    case nir_intrinsic_load_frag_coord_zw:
778    case nir_intrinsic_load_frag_coord_zw_pan:
779    case nir_intrinsic_load_frag_coord_unscaled_ir3:
780    case nir_intrinsic_load_pixel_coord:
781    case nir_intrinsic_load_fully_covered:
782    case nir_intrinsic_load_sample_pos:
783    case nir_intrinsic_load_sample_pos_or_center:
784    case nir_intrinsic_load_vertex_id_zero_base:
785    case nir_intrinsic_load_vertex_id:
786    case nir_intrinsic_load_invocation_id:
787    case nir_intrinsic_load_local_invocation_id:
788    case nir_intrinsic_load_local_invocation_index:
789    case nir_intrinsic_load_global_invocation_id:
790    case nir_intrinsic_load_global_invocation_index:
791    case nir_intrinsic_load_subgroup_invocation:
792    case nir_intrinsic_load_subgroup_eq_mask:
793    case nir_intrinsic_load_subgroup_ge_mask:
794    case nir_intrinsic_load_subgroup_gt_mask:
795    case nir_intrinsic_load_subgroup_le_mask:
796    case nir_intrinsic_load_subgroup_lt_mask:
797    case nir_intrinsic_load_helper_invocation:
798    case nir_intrinsic_is_helper_invocation:
799    case nir_intrinsic_load_scratch:
800    case nir_intrinsic_deref_atomic:
801    case nir_intrinsic_deref_atomic_swap:
802    case nir_intrinsic_ssbo_atomic:
803    case nir_intrinsic_ssbo_atomic_swap:
804    case nir_intrinsic_ssbo_atomic_ir3:
805    case nir_intrinsic_ssbo_atomic_swap_ir3:
806    case nir_intrinsic_image_deref_atomic:
807    case nir_intrinsic_image_deref_atomic_swap:
808    case nir_intrinsic_image_atomic:
809    case nir_intrinsic_image_atomic_swap:
810    case nir_intrinsic_bindless_image_atomic:
811    case nir_intrinsic_bindless_image_atomic_swap:
812    case nir_intrinsic_shared_atomic:
813    case nir_intrinsic_shared_atomic_swap:
814    case nir_intrinsic_task_payload_atomic:
815    case nir_intrinsic_task_payload_atomic_swap:
816    case nir_intrinsic_global_atomic:
817    case nir_intrinsic_global_atomic_swap:
818    case nir_intrinsic_global_atomic_amd:
819    case nir_intrinsic_global_atomic_agx:
820    case nir_intrinsic_global_atomic_swap_amd:
821    case nir_intrinsic_global_atomic_swap_agx:
822    case nir_intrinsic_global_atomic_2x32:
823    case nir_intrinsic_global_atomic_swap_2x32:
824    case nir_intrinsic_global_atomic_ir3:
825    case nir_intrinsic_global_atomic_swap_ir3:
826    case nir_intrinsic_atomic_counter_add:
827    case nir_intrinsic_atomic_counter_min:
828    case nir_intrinsic_atomic_counter_max:
829    case nir_intrinsic_atomic_counter_and:
830    case nir_intrinsic_atomic_counter_or:
831    case nir_intrinsic_atomic_counter_xor:
832    case nir_intrinsic_atomic_counter_inc:
833    case nir_intrinsic_atomic_counter_pre_dec:
834    case nir_intrinsic_atomic_counter_post_dec:
835    case nir_intrinsic_atomic_counter_exchange:
836    case nir_intrinsic_atomic_counter_comp_swap:
837    case nir_intrinsic_atomic_counter_add_deref:
838    case nir_intrinsic_atomic_counter_min_deref:
839    case nir_intrinsic_atomic_counter_max_deref:
840    case nir_intrinsic_atomic_counter_and_deref:
841    case nir_intrinsic_atomic_counter_or_deref:
842    case nir_intrinsic_atomic_counter_xor_deref:
843    case nir_intrinsic_atomic_counter_inc_deref:
844    case nir_intrinsic_atomic_counter_pre_dec_deref:
845    case nir_intrinsic_atomic_counter_post_dec_deref:
846    case nir_intrinsic_atomic_counter_exchange_deref:
847    case nir_intrinsic_atomic_counter_comp_swap_deref:
848    case nir_intrinsic_exclusive_scan:
849    case nir_intrinsic_exclusive_scan_clusters_ir3:
850    case nir_intrinsic_ballot_bit_count_exclusive:
851    case nir_intrinsic_ballot_bit_count_inclusive:
852    case nir_intrinsic_write_invocation_amd:
853    case nir_intrinsic_mbcnt_amd:
854    case nir_intrinsic_lane_permute_16_amd:
855    case nir_intrinsic_dpp16_shift_amd:
856    case nir_intrinsic_elect:
857    case nir_intrinsic_elect_any_ir3:
858    case nir_intrinsic_load_tlb_color_brcm:
859    case nir_intrinsic_load_tess_rel_patch_id_amd:
860    case nir_intrinsic_load_gs_vertex_offset_amd:
861    case nir_intrinsic_is_subgroup_invocation_lt_amd:
862    case nir_intrinsic_load_packed_passthrough_primitive_amd:
863    case nir_intrinsic_load_initial_edgeflags_amd:
864    case nir_intrinsic_gds_atomic_add_amd:
865    case nir_intrinsic_load_rt_arg_scratch_offset_amd:
866    case nir_intrinsic_load_intersection_opaque_amd:
867    case nir_intrinsic_load_vector_arg_amd:
868    case nir_intrinsic_load_btd_stack_id_intel:
869    case nir_intrinsic_load_topology_id_intel:
870    case nir_intrinsic_load_scratch_base_ptr:
871    case nir_intrinsic_ordered_xfb_counter_add_gfx11_amd:
872    case nir_intrinsic_ordered_add_loop_gfx12_amd:
873    case nir_intrinsic_xfb_counter_sub_gfx11_amd:
874    case nir_intrinsic_unit_test_divergent_amd:
875    case nir_intrinsic_load_stack:
876    case nir_intrinsic_load_ray_launch_id:
877    case nir_intrinsic_load_ray_instance_custom_index:
878    case nir_intrinsic_load_ray_geometry_index:
879    case nir_intrinsic_load_ray_world_direction:
880    case nir_intrinsic_load_ray_world_origin:
881    case nir_intrinsic_load_ray_object_origin:
882    case nir_intrinsic_load_ray_object_direction:
883    case nir_intrinsic_load_ray_t_min:
884    case nir_intrinsic_load_ray_t_max:
885    case nir_intrinsic_load_ray_object_to_world:
886    case nir_intrinsic_load_ray_world_to_object:
887    case nir_intrinsic_load_ray_hit_kind:
888    case nir_intrinsic_load_ray_flags:
889    case nir_intrinsic_load_cull_mask:
890    case nir_intrinsic_load_sysval_nv:
891    case nir_intrinsic_emit_vertex_nv:
892    case nir_intrinsic_end_primitive_nv:
893    case nir_intrinsic_report_ray_intersection:
894    case nir_intrinsic_rq_proceed:
895    case nir_intrinsic_rq_load:
896    case nir_intrinsic_load_ray_triangle_vertex_positions:
897    case nir_intrinsic_cmat_extract:
898    case nir_intrinsic_cmat_muladd_amd:
899    case nir_intrinsic_dpas_intel:
900    case nir_intrinsic_isberd_nv:
901    case nir_intrinsic_al2p_nv:
902    case nir_intrinsic_ald_nv:
903    case nir_intrinsic_ipa_nv:
904    case nir_intrinsic_ldtram_nv:
905    case nir_intrinsic_printf:
906    case nir_intrinsic_load_gs_header_ir3:
907    case nir_intrinsic_load_tcs_header_ir3:
908    case nir_intrinsic_load_rel_patch_id_ir3:
909    case nir_intrinsic_brcst_active_ir3:
910    case nir_intrinsic_load_helper_op_id_agx:
911    case nir_intrinsic_load_helper_arg_lo_agx:
912    case nir_intrinsic_load_helper_arg_hi_agx:
913    case nir_intrinsic_stack_map_agx:
914    case nir_intrinsic_stack_unmap_agx:
915    case nir_intrinsic_load_exported_agx:
916    case nir_intrinsic_load_local_pixel_agx:
917    case nir_intrinsic_load_coefficients_agx:
918    case nir_intrinsic_load_active_subgroup_invocation_agx:
919    case nir_intrinsic_load_sample_mask:
920    case nir_intrinsic_quad_ballot_agx:
921    case nir_intrinsic_load_agx:
922       is_divergent = true;
923       break;
924 
925    default:
926 #ifdef NDEBUG
927       is_divergent = true;
928       break;
929 #else
930       nir_print_instr(&instr->instr, stderr);
931       unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
932 #endif
933    }
934 
935    instr->def.divergent = is_divergent;
936    return is_divergent;
937 }
938 
939 static bool
visit_tex(nir_tex_instr * instr,struct divergence_state * state)940 visit_tex(nir_tex_instr *instr, struct divergence_state *state)
941 {
942    if (instr->def.divergent)
943       return false;
944 
945    bool is_divergent = false;
946 
947    for (unsigned i = 0; i < instr->num_srcs; i++) {
948       switch (instr->src[i].src_type) {
949       case nir_tex_src_sampler_deref:
950       case nir_tex_src_sampler_handle:
951       case nir_tex_src_sampler_offset:
952          is_divergent |= src_divergent(instr->src[i].src, state) &&
953                          instr->sampler_non_uniform;
954          break;
955       case nir_tex_src_texture_deref:
956       case nir_tex_src_texture_handle:
957       case nir_tex_src_texture_offset:
958          is_divergent |= src_divergent(instr->src[i].src, state) &&
959                          instr->texture_non_uniform;
960          break;
961       default:
962          is_divergent |= src_divergent(instr->src[i].src, state);
963          break;
964       }
965    }
966 
967    instr->def.divergent = is_divergent;
968    return is_divergent;
969 }
970 
971 static bool
visit_def(nir_def * def,struct divergence_state * state)972 visit_def(nir_def *def, struct divergence_state *state)
973 {
974    return false;
975 }
976 
977 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)978 nir_variable_mode_is_uniform(nir_variable_mode mode)
979 {
980    switch (mode) {
981    case nir_var_uniform:
982    case nir_var_mem_ubo:
983    case nir_var_mem_ssbo:
984    case nir_var_mem_shared:
985    case nir_var_mem_task_payload:
986    case nir_var_mem_global:
987    case nir_var_image:
988       return true;
989    default:
990       return false;
991    }
992 }
993 
994 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var,struct divergence_state * state)995 nir_variable_is_uniform(nir_shader *shader, nir_variable *var,
996                         struct divergence_state *state)
997 {
998    if (nir_variable_mode_is_uniform(var->data.mode))
999       return true;
1000 
1001    /* Handle system value variables. */
1002    if (var->data.mode == nir_var_system_value) {
1003       /* Fake the instruction to reuse visit_intrinsic for all sysvals. */
1004       nir_intrinsic_instr fake_instr;
1005 
1006       memset(&fake_instr, 0, sizeof(fake_instr));
1007       fake_instr.intrinsic =
1008          nir_intrinsic_from_system_value(var->data.location);
1009 
1010       visit_intrinsic(&fake_instr, state);
1011       return !fake_instr.def.divergent;
1012    }
1013 
1014    nir_divergence_options options = state->options;
1015    gl_shader_stage stage = shader->info.stage;
1016 
1017    if (stage == MESA_SHADER_FRAGMENT &&
1018        (options & nir_divergence_single_prim_per_subgroup) &&
1019        var->data.mode == nir_var_shader_in &&
1020        var->data.interpolation == INTERP_MODE_FLAT)
1021       return true;
1022 
1023    if (stage == MESA_SHADER_TESS_CTRL &&
1024        (options & nir_divergence_single_patch_per_tcs_subgroup) &&
1025        var->data.mode == nir_var_shader_out && var->data.patch)
1026       return true;
1027 
1028    if (stage == MESA_SHADER_TESS_EVAL &&
1029        (options & nir_divergence_single_patch_per_tes_subgroup) &&
1030        var->data.mode == nir_var_shader_in && var->data.patch)
1031       return true;
1032 
1033    return false;
1034 }
1035 
1036 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref,struct divergence_state * state)1037 visit_deref(nir_shader *shader, nir_deref_instr *deref,
1038             struct divergence_state *state)
1039 {
1040    if (deref->def.divergent)
1041       return false;
1042 
1043    bool is_divergent = false;
1044    switch (deref->deref_type) {
1045    case nir_deref_type_var:
1046       is_divergent = !nir_variable_is_uniform(shader, deref->var, state);
1047       break;
1048    case nir_deref_type_array:
1049    case nir_deref_type_ptr_as_array:
1050       is_divergent = src_divergent(deref->arr.index, state);
1051       FALLTHROUGH;
1052    case nir_deref_type_struct:
1053    case nir_deref_type_array_wildcard:
1054       is_divergent |= src_divergent(deref->parent, state);
1055       break;
1056    case nir_deref_type_cast:
1057       is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
1058                      src_divergent(deref->parent, state);
1059       break;
1060    }
1061 
1062    deref->def.divergent = is_divergent;
1063    return is_divergent;
1064 }
1065 
1066 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)1067 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
1068 {
1069    switch (jump->type) {
1070    case nir_jump_continue:
1071       if (state->divergent_loop_continue)
1072          return false;
1073       if (state->divergent_loop_cf)
1074          state->divergent_loop_continue = true;
1075       return state->divergent_loop_continue;
1076    case nir_jump_break:
1077       if (state->divergent_loop_break)
1078          return false;
1079       if (state->divergent_loop_cf)
1080          state->divergent_loop_break = true;
1081       return state->divergent_loop_break;
1082    case nir_jump_halt:
1083       /* This totally kills invocations so it doesn't add divergence */
1084       break;
1085    case nir_jump_return:
1086       unreachable("NIR divergence analysis: Unsupported return instruction.");
1087       break;
1088    case nir_jump_goto:
1089    case nir_jump_goto_if:
1090       unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
1091       break;
1092    }
1093    return false;
1094 }
1095 
1096 static bool
set_ssa_def_not_divergent(nir_def * def,void * invariant)1097 set_ssa_def_not_divergent(nir_def *def, void *invariant)
1098 {
1099    def->divergent = false;
1100    def->loop_invariant = *(bool *)invariant;
1101    return true;
1102 }
1103 
1104 static bool
instr_is_loop_invariant(nir_instr * instr,struct divergence_state * state)1105 instr_is_loop_invariant(nir_instr *instr, struct divergence_state *state)
1106 {
1107    if (!state->loop)
1108       return false;
1109 
1110    switch (instr->type) {
1111    case nir_instr_type_load_const:
1112    case nir_instr_type_undef:
1113    case nir_instr_type_debug_info:
1114    case nir_instr_type_jump:
1115       return true;
1116    case nir_instr_type_intrinsic:
1117       if (!nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr)))
1118          return false;
1119       FALLTHROUGH;
1120    case nir_instr_type_alu:
1121    case nir_instr_type_deref:
1122    case nir_instr_type_tex:
1123       return nir_foreach_src(instr, src_invariant, state->loop);
1124    case nir_instr_type_call:
1125       return false;
1126    case nir_instr_type_phi:
1127    case nir_instr_type_parallel_copy:
1128    default:
1129       unreachable("NIR divergence analysis: Unsupported instruction type.");
1130    }
1131 }
1132 
1133 static bool
update_instr_divergence(nir_instr * instr,struct divergence_state * state)1134 update_instr_divergence(nir_instr *instr, struct divergence_state *state)
1135 {
1136    switch (instr->type) {
1137    case nir_instr_type_alu:
1138       return visit_alu(nir_instr_as_alu(instr), state);
1139    case nir_instr_type_intrinsic:
1140       return visit_intrinsic(nir_instr_as_intrinsic(instr), state);
1141    case nir_instr_type_tex:
1142       return visit_tex(nir_instr_as_tex(instr), state);
1143    case nir_instr_type_load_const:
1144       return visit_def(&nir_instr_as_load_const(instr)->def, state);
1145    case nir_instr_type_undef:
1146       return visit_def(&nir_instr_as_undef(instr)->def, state);
1147    case nir_instr_type_deref:
1148       return visit_deref(state->shader, nir_instr_as_deref(instr), state);
1149    case nir_instr_type_debug_info:
1150       return false;
1151    case nir_instr_type_call:
1152       return false;
1153    case nir_instr_type_jump:
1154    case nir_instr_type_phi:
1155    case nir_instr_type_parallel_copy:
1156    default:
1157       unreachable("NIR divergence analysis: Unsupported instruction type.");
1158    }
1159 }
1160 
1161 static bool
visit_block(nir_block * block,struct divergence_state * state)1162 visit_block(nir_block *block, struct divergence_state *state)
1163 {
1164    bool has_changed = false;
1165 
1166    nir_foreach_instr(instr, block) {
1167       /* phis are handled when processing the branches */
1168       if (instr->type == nir_instr_type_phi)
1169          continue;
1170 
1171       if (state->first_visit) {
1172          bool invariant = state->loop_all_invariant || instr_is_loop_invariant(instr, state);
1173          nir_foreach_def(instr, set_ssa_def_not_divergent, &invariant);
1174       }
1175 
1176       if (instr->type == nir_instr_type_jump) {
1177          has_changed |= visit_jump(nir_instr_as_jump(instr), state);
1178       } else {
1179          has_changed |= update_instr_divergence(instr, state);
1180       }
1181    }
1182 
1183    bool divergent = state->divergent_loop_cf ||
1184                     state->divergent_loop_continue ||
1185                     state->divergent_loop_break;
1186    if (divergent != block->divergent) {
1187       block->divergent = divergent;
1188       has_changed = true;
1189    }
1190 
1191    return has_changed;
1192 }
1193 
1194 /* There are 3 types of phi instructions:
1195  * (1) gamma: represent the joining point of different paths
1196  *     created by an “if-then-else” branch.
1197  *     The resulting value is divergent if the branch condition
1198  *     or any of the source values is divergent. */
1199 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent,bool ignore_undef)1200 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent, bool ignore_undef)
1201 {
1202    if (phi->def.divergent)
1203       return false;
1204 
1205    unsigned defined_srcs = 0;
1206    nir_foreach_phi_src(src, phi) {
1207       /* if any source value is divergent, the resulting value is divergent */
1208       if (nir_src_is_divergent(&src->src)) {
1209          phi->def.divergent = true;
1210          return true;
1211       }
1212       if (src->src.ssa->parent_instr->type != nir_instr_type_undef) {
1213          defined_srcs++;
1214       }
1215    }
1216 
1217    if (!(ignore_undef && defined_srcs <= 1) && if_cond_divergent) {
1218       phi->def.divergent = true;
1219       return true;
1220    }
1221 
1222    return false;
1223 }
1224 
1225 /* There are 3 types of phi instructions:
1226  * (2) mu: which only exist at loop headers,
1227  *     merge initial and loop-carried values.
1228  *     The resulting value is divergent if any source value
1229  *     is divergent or a divergent loop continue condition
1230  *     is associated with a different ssa-def. */
1231 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)1232 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
1233 {
1234    if (phi->def.divergent)
1235       return false;
1236 
1237    nir_def *same = NULL;
1238    nir_foreach_phi_src(src, phi) {
1239       /* if any source value is divergent, the resulting value is divergent */
1240       if (nir_src_is_divergent(&src->src)) {
1241          phi->def.divergent = true;
1242          return true;
1243       }
1244       /* if this loop is uniform, we're done here */
1245       if (!divergent_continue)
1246          continue;
1247       /* skip the loop preheader */
1248       if (src->pred == preheader)
1249          continue;
1250 
1251       /* check if all loop-carried values are from the same ssa-def */
1252       if (!same)
1253          same = src->src.ssa;
1254       else if (same != src->src.ssa) {
1255          phi->def.divergent = true;
1256          return true;
1257       }
1258    }
1259 
1260    return false;
1261 }
1262 
1263 /* There are 3 types of phi instructions:
1264  * (3) eta: represent values that leave a loop.
1265  *     The resulting value is divergent if the source value is divergent
1266  *     or any loop exit condition is divergent for a value which is
1267  *     not loop-invariant (see nir_src_is_divergent()).
1268  */
1269 static bool
visit_loop_exit_phi(nir_phi_instr * phi,nir_loop * loop)1270 visit_loop_exit_phi(nir_phi_instr *phi, nir_loop *loop)
1271 {
1272    if (phi->def.divergent)
1273       return false;
1274 
1275    nir_def *same = NULL;
1276    nir_foreach_phi_src(src, phi) {
1277       /* If any loop exit condition is divergent and this value is not loop
1278        * invariant, or if the source value is divergent, then the resulting
1279        * value is divergent.
1280        */
1281       if ((loop->divergent_break && !src_invariant(&src->src, loop)) ||
1282           nir_src_is_divergent(&src->src)) {
1283          phi->def.divergent = true;
1284          return true;
1285       }
1286 
1287       /* if this loop is uniform, we're done here */
1288       if (!loop->divergent_break)
1289          continue;
1290 
1291       /* check if all loop-exit values are from the same ssa-def */
1292       if (!same)
1293          same = src->src.ssa;
1294       else if (same != src->src.ssa) {
1295          phi->def.divergent = true;
1296          return true;
1297       }
1298    }
1299 
1300    return false;
1301 }
1302 
1303 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)1304 visit_if(nir_if *if_stmt, struct divergence_state *state)
1305 {
1306    bool progress = false;
1307    bool cond_divergent = src_divergent(if_stmt->condition, state);
1308 
1309    struct divergence_state then_state = *state;
1310    then_state.divergent_loop_cf |= cond_divergent;
1311    progress |= visit_cf_list(&if_stmt->then_list, &then_state);
1312 
1313    struct divergence_state else_state = *state;
1314    else_state.divergent_loop_cf |= cond_divergent;
1315    progress |= visit_cf_list(&if_stmt->else_list, &else_state);
1316 
1317    /* handle phis after the IF */
1318    bool invariant = state->loop && src_invariant(&if_stmt->condition, state->loop);
1319    nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
1320       if (state->first_visit) {
1321          phi->def.divergent = false;
1322          phi->def.loop_invariant =
1323             invariant && nir_foreach_src(&phi->instr, src_invariant, state->loop);
1324       }
1325       bool ignore_undef = state->options & nir_divergence_ignore_undef_if_phi_srcs;
1326       progress |= visit_if_merge_phi(phi, cond_divergent, ignore_undef);
1327    }
1328 
1329    /* join loop divergence information from both branch legs */
1330    state->divergent_loop_continue |= then_state.divergent_loop_continue ||
1331                                      else_state.divergent_loop_continue;
1332    state->divergent_loop_break |= then_state.divergent_loop_break ||
1333                                   else_state.divergent_loop_break;
1334 
1335    /* A divergent continue makes succeeding loop CF divergent:
1336     * not all loop-active invocations participate in the remaining loop-body
1337     * which means that a following break might be taken by some invocations, only */
1338    state->divergent_loop_cf |= state->divergent_loop_continue;
1339 
1340    state->consider_loop_invariance |= then_state.consider_loop_invariance ||
1341                                       else_state.consider_loop_invariance;
1342 
1343    return progress;
1344 }
1345 
1346 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)1347 visit_loop(nir_loop *loop, struct divergence_state *state)
1348 {
1349    assert(!nir_loop_has_continue_construct(loop));
1350    bool progress = false;
1351    nir_block *loop_header = nir_loop_first_block(loop);
1352    nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
1353 
1354    /* handle loop header phis first: we have no knowledge yet about
1355     * the loop's control flow or any loop-carried sources. */
1356    nir_foreach_phi(phi, loop_header) {
1357       if (!state->first_visit && phi->def.divergent)
1358          continue;
1359 
1360       phi->def.loop_invariant = false;
1361       nir_foreach_phi_src(src, phi) {
1362          if (src->pred == loop_preheader) {
1363             phi->def.divergent = nir_src_is_divergent(&src->src);
1364             break;
1365          }
1366       }
1367       progress |= phi->def.divergent;
1368    }
1369 
1370    /* setup loop state */
1371    struct divergence_state loop_state = *state;
1372    loop_state.loop = loop;
1373    loop_state.loop_all_invariant = loop_header->predecessors->entries == 1;
1374    loop_state.divergent_loop_cf = false;
1375    loop_state.divergent_loop_continue = false;
1376    loop_state.divergent_loop_break = false;
1377 
1378    /* process loop body until no further changes are made */
1379    bool repeat;
1380    do {
1381       progress |= visit_cf_list(&loop->body, &loop_state);
1382       repeat = false;
1383 
1384       /* revisit loop header phis to see if something has changed */
1385       nir_foreach_phi(phi, loop_header) {
1386          repeat |= visit_loop_header_phi(phi, loop_preheader,
1387                                          loop_state.divergent_loop_continue);
1388       }
1389 
1390       loop_state.divergent_loop_cf = false;
1391       loop_state.first_visit = false;
1392    } while (repeat);
1393 
1394    loop->divergent_continue = loop_state.divergent_loop_continue;
1395    loop->divergent_break = loop_state.divergent_loop_break;
1396 
1397    /* handle phis after the loop */
1398    nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1399       if (state->first_visit) {
1400          phi->def.divergent = false;
1401          phi->def.loop_invariant = false;
1402       }
1403       progress |= visit_loop_exit_phi(phi, loop);
1404    }
1405 
1406    state->consider_loop_invariance |= loop_state.consider_loop_invariance ||
1407                                       loop->divergent_break;
1408    return progress;
1409 }
1410 
1411 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)1412 visit_cf_list(struct exec_list *list, struct divergence_state *state)
1413 {
1414    bool has_changed = false;
1415 
1416    foreach_list_typed(nir_cf_node, node, node, list) {
1417       switch (node->type) {
1418       case nir_cf_node_block:
1419          has_changed |= visit_block(nir_cf_node_as_block(node), state);
1420          break;
1421       case nir_cf_node_if:
1422          has_changed |= visit_if(nir_cf_node_as_if(node), state);
1423          break;
1424       case nir_cf_node_loop:
1425          has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1426          break;
1427       case nir_cf_node_function:
1428          unreachable("NIR divergence analysis: Unsupported cf_node type.");
1429       }
1430    }
1431 
1432    return has_changed;
1433 }
1434 
1435 void
nir_divergence_analysis_impl(nir_function_impl * impl,nir_divergence_options options)1436 nir_divergence_analysis_impl(nir_function_impl *impl, nir_divergence_options options)
1437 {
1438    nir_metadata_require(impl, nir_metadata_block_index);
1439 
1440    struct divergence_state state = {
1441       .stage = impl->function->shader->info.stage,
1442       .shader = impl->function->shader,
1443       .impl = impl,
1444       .options = options,
1445       .loop = NULL,
1446       .loop_all_invariant = false,
1447       .divergent_loop_cf = false,
1448       .divergent_loop_continue = false,
1449       .divergent_loop_break = false,
1450       .first_visit = true,
1451    };
1452 
1453    visit_cf_list(&impl->body, &state);
1454 
1455    nir_metadata_preserve(impl, nir_metadata_all);
1456 }
1457 
1458 void
nir_divergence_analysis(nir_shader * shader)1459 nir_divergence_analysis(nir_shader *shader)
1460 {
1461    shader->info.divergence_analysis_run = true;
1462    nir_foreach_function_impl(impl, shader) {
1463       nir_divergence_analysis_impl(impl,
1464                                    shader->options->divergence_analysis_options);
1465    }
1466 }
1467 
1468 /* Compute divergence between vertices of the same primitive. This uses
1469  * the same divergent field in nir_def and nir_loop as the regular divergence
1470  * pass.
1471  */
1472 void
nir_vertex_divergence_analysis(nir_shader * shader)1473 nir_vertex_divergence_analysis(nir_shader *shader)
1474 {
1475    shader->info.divergence_analysis_run = false;
1476 
1477    struct divergence_state state = {
1478       .stage = shader->info.stage,
1479       .shader = shader,
1480       .options = shader->options->divergence_analysis_options,
1481       .loop = NULL,
1482       .loop_all_invariant = false,
1483       .vertex_divergence = true,
1484       .first_visit = true,
1485    };
1486 
1487    nir_foreach_function_impl(impl, shader) {
1488       nir_metadata_require(impl, nir_metadata_block_index);
1489       state.impl = impl;
1490       visit_cf_list(&impl->body, &state);
1491       nir_metadata_preserve(impl, nir_metadata_all);
1492    }
1493 }
1494 
1495 bool
nir_has_divergent_loop(nir_shader * shader)1496 nir_has_divergent_loop(nir_shader *shader)
1497 {
1498    nir_function_impl *func = nir_shader_get_entrypoint(shader);
1499 
1500    foreach_list_typed(nir_cf_node, node, node, &func->body) {
1501       if (node->type == nir_cf_node_loop) {
1502          if (nir_cf_node_as_loop(node)->divergent_break)
1503             return true;
1504       }
1505    }
1506 
1507    return false;
1508 }
1509