• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 
27 /* This pass computes for each ssa definition if it is uniform.
28  * That is, the variable has the same value for all invocations
29  * of the group.
30  *
31  * This divergence analysis pass expects the shader to be in LCSSA-form.
32  *
33  * This algorithm implements "The Simple Divergence Analysis" from
34  * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35  * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36  * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37  */
38 
39 struct divergence_state {
40    const gl_shader_stage stage;
41    nir_shader *shader;
42 
43    /** current control flow state */
44    /* True if some loop-active invocations might take a different control-flow path.
45     * A divergent break does not cause subsequent control-flow to be considered
46     * divergent because those invocations are no longer active in the loop.
47     * For a divergent if, both sides are considered divergent flow because
48     * the other side is still loop-active. */
49    bool divergent_loop_cf;
50    /* True if a divergent continue happened since the loop header */
51    bool divergent_loop_continue;
52    /* True if a divergent break happened since the loop header */
53    bool divergent_loop_break;
54 
55    /* True if we visit the block for the fist time */
56    bool first_visit;
57 };
58 
59 static bool
60 visit_cf_list(struct exec_list *list, struct divergence_state *state);
61 
62 static bool
visit_alu(nir_alu_instr * instr)63 visit_alu(nir_alu_instr *instr)
64 {
65    if (instr->dest.dest.ssa.divergent)
66       return false;
67 
68    unsigned num_src = nir_op_infos[instr->op].num_inputs;
69 
70    for (unsigned i = 0; i < num_src; i++) {
71       if (instr->src[i].src.ssa->divergent) {
72          instr->dest.dest.ssa.divergent = true;
73          return true;
74       }
75    }
76 
77    return false;
78 }
79 
80 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr)81 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82 {
83    if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84       return false;
85 
86    if (instr->dest.ssa.divergent)
87       return false;
88 
89    nir_divergence_options options = shader->options->divergence_analysis_options;
90    gl_shader_stage stage = shader->info.stage;
91    bool is_divergent = false;
92    switch (instr->intrinsic) {
93    /* Intrinsics which are always uniform */
94    case nir_intrinsic_shader_clock:
95    case nir_intrinsic_ballot:
96    case nir_intrinsic_read_invocation:
97    case nir_intrinsic_read_first_invocation:
98    case nir_intrinsic_vote_any:
99    case nir_intrinsic_vote_all:
100    case nir_intrinsic_vote_feq:
101    case nir_intrinsic_vote_ieq:
102    case nir_intrinsic_load_push_constant:
103    case nir_intrinsic_load_work_dim:
104    case nir_intrinsic_load_num_workgroups:
105    case nir_intrinsic_load_workgroup_size:
106    case nir_intrinsic_load_subgroup_id:
107    case nir_intrinsic_load_num_subgroups:
108    case nir_intrinsic_load_ray_launch_size:
109    case nir_intrinsic_load_ray_launch_size_addr_amd:
110    case nir_intrinsic_load_sbt_base_amd:
111    case nir_intrinsic_load_subgroup_size:
112    case nir_intrinsic_load_subgroup_eq_mask:
113    case nir_intrinsic_load_subgroup_ge_mask:
114    case nir_intrinsic_load_subgroup_gt_mask:
115    case nir_intrinsic_load_subgroup_le_mask:
116    case nir_intrinsic_load_subgroup_lt_mask:
117    case nir_intrinsic_first_invocation:
118    case nir_intrinsic_last_invocation:
119    case nir_intrinsic_load_base_instance:
120    case nir_intrinsic_load_base_vertex:
121    case nir_intrinsic_load_first_vertex:
122    case nir_intrinsic_load_draw_id:
123    case nir_intrinsic_load_is_indexed_draw:
124    case nir_intrinsic_load_viewport_scale:
125    case nir_intrinsic_load_user_clip_plane:
126    case nir_intrinsic_load_viewport_x_scale:
127    case nir_intrinsic_load_viewport_y_scale:
128    case nir_intrinsic_load_viewport_z_scale:
129    case nir_intrinsic_load_viewport_offset:
130    case nir_intrinsic_load_viewport_x_offset:
131    case nir_intrinsic_load_viewport_y_offset:
132    case nir_intrinsic_load_viewport_z_offset:
133    case nir_intrinsic_load_blend_const_color_a_float:
134    case nir_intrinsic_load_blend_const_color_b_float:
135    case nir_intrinsic_load_blend_const_color_g_float:
136    case nir_intrinsic_load_blend_const_color_r_float:
137    case nir_intrinsic_load_blend_const_color_rgba:
138    case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
139    case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
140    case nir_intrinsic_load_line_width:
141    case nir_intrinsic_load_aa_line_width:
142    case nir_intrinsic_load_xfb_address:
143    case nir_intrinsic_load_num_vertices:
144    case nir_intrinsic_load_fb_layers_v3d:
145    case nir_intrinsic_load_tcs_num_patches_amd:
146    case nir_intrinsic_load_ring_tess_factors_amd:
147    case nir_intrinsic_load_ring_tess_offchip_amd:
148    case nir_intrinsic_load_ring_tess_factors_offset_amd:
149    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
150    case nir_intrinsic_load_ring_mesh_scratch_amd:
151    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
152    case nir_intrinsic_load_ring_esgs_amd:
153    case nir_intrinsic_load_ring_es2gs_offset_amd:
154    case nir_intrinsic_load_ring_task_draw_amd:
155    case nir_intrinsic_load_ring_task_payload_amd:
156    case nir_intrinsic_load_task_ring_entry_amd:
157    case nir_intrinsic_load_task_ib_addr:
158    case nir_intrinsic_load_task_ib_stride:
159    case nir_intrinsic_load_sample_positions_pan:
160    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
161    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
162    case nir_intrinsic_load_shader_query_enabled_amd:
163    case nir_intrinsic_load_cull_front_face_enabled_amd:
164    case nir_intrinsic_load_cull_back_face_enabled_amd:
165    case nir_intrinsic_load_cull_ccw_amd:
166    case nir_intrinsic_load_cull_small_primitives_enabled_amd:
167    case nir_intrinsic_load_cull_any_enabled_amd:
168    case nir_intrinsic_load_cull_small_prim_precision_amd:
169    case nir_intrinsic_load_user_data_amd:
170    case nir_intrinsic_load_force_vrs_rates_amd:
171    case nir_intrinsic_load_tess_level_inner_default:
172    case nir_intrinsic_load_tess_level_outer_default:
173    case nir_intrinsic_load_scalar_arg_amd:
174    case nir_intrinsic_load_smem_amd:
175    case nir_intrinsic_load_global_const_block_intel:
176    case nir_intrinsic_load_reloc_const_intel:
177    case nir_intrinsic_load_global_block_intel:
178    case nir_intrinsic_load_btd_global_arg_addr_intel:
179    case nir_intrinsic_load_btd_local_arg_addr_intel:
180    case nir_intrinsic_load_mesh_inline_data_intel:
181    case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
182    case nir_intrinsic_load_lshs_vertex_stride_amd:
183    case nir_intrinsic_load_hs_out_patch_data_offset_amd:
184       is_divergent = false;
185       break;
186 
187    /* Intrinsics with divergence depending on shader stage and hardware */
188    case nir_intrinsic_load_frag_shading_rate:
189       is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
190       break;
191    case nir_intrinsic_load_input:
192       is_divergent = instr->src[0].ssa->divergent;
193       if (stage == MESA_SHADER_FRAGMENT)
194          is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
195       else if (stage == MESA_SHADER_TESS_EVAL)
196          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
197       else if (stage != MESA_SHADER_MESH)
198          is_divergent = true;
199       break;
200    case nir_intrinsic_load_per_vertex_input:
201       is_divergent = instr->src[0].ssa->divergent ||
202                      instr->src[1].ssa->divergent;
203       if (stage == MESA_SHADER_TESS_CTRL)
204          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
205       if (stage == MESA_SHADER_TESS_EVAL)
206          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
207       else
208          is_divergent = true;
209       break;
210    case nir_intrinsic_load_input_vertex:
211       is_divergent = instr->src[1].ssa->divergent;
212       assert(stage == MESA_SHADER_FRAGMENT);
213       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
214       break;
215    case nir_intrinsic_load_output:
216       is_divergent = instr->src[0].ssa->divergent;
217       switch (stage) {
218       case MESA_SHADER_TESS_CTRL:
219          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
220          break;
221       case MESA_SHADER_FRAGMENT:
222          is_divergent = true;
223          break;
224       case MESA_SHADER_TASK:
225       case MESA_SHADER_MESH:
226          /* Divergent if src[0] is, so nothing else to do. */
227          break;
228       default:
229          unreachable("Invalid stage for load_output");
230       }
231       break;
232    case nir_intrinsic_load_per_vertex_output:
233       assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
234       is_divergent = instr->src[0].ssa->divergent ||
235                      instr->src[1].ssa->divergent ||
236                      (stage == MESA_SHADER_TESS_CTRL &&
237                       !(options & nir_divergence_single_patch_per_tcs_subgroup));
238       break;
239    case nir_intrinsic_load_per_primitive_output:
240       assert(stage == MESA_SHADER_MESH);
241       is_divergent = instr->src[0].ssa->divergent ||
242                      instr->src[1].ssa->divergent;
243       break;
244    case nir_intrinsic_load_layer_id:
245    case nir_intrinsic_load_front_face:
246       assert(stage == MESA_SHADER_FRAGMENT);
247       is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
248       break;
249    case nir_intrinsic_load_view_index:
250       assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
251       if (options & nir_divergence_view_index_uniform)
252          is_divergent = false;
253       else if (stage == MESA_SHADER_FRAGMENT)
254          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
255       break;
256    case nir_intrinsic_load_fs_input_interp_deltas:
257       assert(stage == MESA_SHADER_FRAGMENT);
258       is_divergent = instr->src[0].ssa->divergent;
259       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
260       break;
261    case nir_intrinsic_load_primitive_id:
262       if (stage == MESA_SHADER_FRAGMENT)
263          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
264       else if (stage == MESA_SHADER_TESS_CTRL)
265          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
266       else if (stage == MESA_SHADER_TESS_EVAL)
267          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
268       else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
269          is_divergent = true;
270       else
271          unreachable("Invalid stage for load_primitive_id");
272       break;
273    case nir_intrinsic_load_tess_level_inner:
274    case nir_intrinsic_load_tess_level_outer:
275       if (stage == MESA_SHADER_TESS_CTRL)
276          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
277       else if (stage == MESA_SHADER_TESS_EVAL)
278          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
279       else
280          unreachable("Invalid stage for load_primitive_tess_level_*");
281       break;
282    case nir_intrinsic_load_patch_vertices_in:
283       if (stage == MESA_SHADER_TESS_EVAL)
284          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
285       else
286          assert(stage == MESA_SHADER_TESS_CTRL);
287       break;
288 
289    case nir_intrinsic_load_workgroup_index:
290    case nir_intrinsic_load_workgroup_id:
291       assert(gl_shader_stage_uses_workgroup(stage));
292       if (stage == MESA_SHADER_COMPUTE)
293          is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
294       break;
295 
296    /* Clustered reductions are uniform if cluster_size == subgroup_size or
297     * the source is uniform and the operation is invariant.
298     * Inclusive scans are uniform if
299     * the source is uniform and the operation is invariant
300     */
301    case nir_intrinsic_reduce:
302       if (nir_intrinsic_cluster_size(instr) == 0)
303          return false;
304       FALLTHROUGH;
305    case nir_intrinsic_inclusive_scan: {
306       nir_op op = nir_intrinsic_reduction_op(instr);
307       is_divergent = instr->src[0].ssa->divergent;
308       if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
309           op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
310           op != nir_op_iand && op != nir_op_ior)
311          is_divergent = true;
312       break;
313    }
314 
315    case nir_intrinsic_load_ubo:
316    case nir_intrinsic_load_ssbo:
317       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
318                      instr->src[1].ssa->divergent;
319       break;
320 
321    case nir_intrinsic_get_ssbo_size:
322    case nir_intrinsic_deref_buffer_array_length:
323       is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
324       break;
325 
326    case nir_intrinsic_image_load:
327    case nir_intrinsic_image_deref_load:
328    case nir_intrinsic_bindless_image_load:
329    case nir_intrinsic_image_sparse_load:
330    case nir_intrinsic_image_deref_sparse_load:
331    case nir_intrinsic_bindless_image_sparse_load:
332       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
333                      instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
334       break;
335 
336 
337    /* Intrinsics with divergence depending on sources */
338    case nir_intrinsic_ballot_bitfield_extract:
339    case nir_intrinsic_ballot_find_lsb:
340    case nir_intrinsic_ballot_find_msb:
341    case nir_intrinsic_ballot_bit_count_reduce:
342    case nir_intrinsic_shuffle_xor:
343    case nir_intrinsic_shuffle_up:
344    case nir_intrinsic_shuffle_down:
345    case nir_intrinsic_quad_broadcast:
346    case nir_intrinsic_quad_swap_horizontal:
347    case nir_intrinsic_quad_swap_vertical:
348    case nir_intrinsic_quad_swap_diagonal:
349    case nir_intrinsic_byte_permute_amd:
350    case nir_intrinsic_load_deref:
351    case nir_intrinsic_load_shared:
352    case nir_intrinsic_load_shared2_amd:
353    case nir_intrinsic_load_global:
354    case nir_intrinsic_load_global_2x32:
355    case nir_intrinsic_load_global_constant:
356    case nir_intrinsic_load_global_amd:
357    case nir_intrinsic_load_uniform:
358    case nir_intrinsic_load_constant:
359    case nir_intrinsic_load_sample_pos_from_id:
360    case nir_intrinsic_load_kernel_input:
361    case nir_intrinsic_load_task_payload:
362    case nir_intrinsic_load_buffer_amd:
363    case nir_intrinsic_image_samples:
364    case nir_intrinsic_image_deref_samples:
365    case nir_intrinsic_bindless_image_samples:
366    case nir_intrinsic_image_size:
367    case nir_intrinsic_image_deref_size:
368    case nir_intrinsic_bindless_image_size:
369    case nir_intrinsic_copy_deref:
370    case nir_intrinsic_vulkan_resource_index:
371    case nir_intrinsic_vulkan_resource_reindex:
372    case nir_intrinsic_load_vulkan_descriptor:
373    case nir_intrinsic_atomic_counter_read:
374    case nir_intrinsic_atomic_counter_read_deref:
375    case nir_intrinsic_quad_swizzle_amd:
376    case nir_intrinsic_masked_swizzle_amd:
377    case nir_intrinsic_is_sparse_texels_resident:
378    case nir_intrinsic_sparse_residency_code_and:
379    case nir_intrinsic_bvh64_intersect_ray_amd:
380    case nir_intrinsic_image_deref_load_param_intel:
381    case nir_intrinsic_image_load_raw_intel:
382    case nir_intrinsic_get_ubo_size:
383    case nir_intrinsic_load_ssbo_address:
384    case nir_intrinsic_load_desc_set_address_intel: {
385       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
386       for (unsigned i = 0; i < num_srcs; i++) {
387          if (instr->src[i].ssa->divergent) {
388             is_divergent = true;
389             break;
390          }
391       }
392       break;
393    }
394 
395    case nir_intrinsic_shuffle:
396       is_divergent = instr->src[0].ssa->divergent &&
397                      instr->src[1].ssa->divergent;
398       break;
399 
400    /* Intrinsics which are always divergent */
401    case nir_intrinsic_load_color0:
402    case nir_intrinsic_load_color1:
403    case nir_intrinsic_load_param:
404    case nir_intrinsic_load_sample_id:
405    case nir_intrinsic_load_sample_id_no_per_sample:
406    case nir_intrinsic_load_sample_mask_in:
407    case nir_intrinsic_load_interpolated_input:
408    case nir_intrinsic_load_point_coord_maybe_flipped:
409    case nir_intrinsic_load_barycentric_pixel:
410    case nir_intrinsic_load_barycentric_centroid:
411    case nir_intrinsic_load_barycentric_sample:
412    case nir_intrinsic_load_barycentric_model:
413    case nir_intrinsic_load_barycentric_at_sample:
414    case nir_intrinsic_load_barycentric_at_offset:
415    case nir_intrinsic_interp_deref_at_offset:
416    case nir_intrinsic_interp_deref_at_sample:
417    case nir_intrinsic_interp_deref_at_centroid:
418    case nir_intrinsic_interp_deref_at_vertex:
419    case nir_intrinsic_load_tess_coord:
420    case nir_intrinsic_load_point_coord:
421    case nir_intrinsic_load_line_coord:
422    case nir_intrinsic_load_frag_coord:
423    case nir_intrinsic_load_sample_pos:
424    case nir_intrinsic_load_sample_pos_or_center:
425    case nir_intrinsic_load_vertex_id_zero_base:
426    case nir_intrinsic_load_vertex_id:
427    case nir_intrinsic_load_instance_id:
428    case nir_intrinsic_load_invocation_id:
429    case nir_intrinsic_load_local_invocation_id:
430    case nir_intrinsic_load_local_invocation_index:
431    case nir_intrinsic_load_global_invocation_id:
432    case nir_intrinsic_load_global_invocation_id_zero_base:
433    case nir_intrinsic_load_global_invocation_index:
434    case nir_intrinsic_load_subgroup_invocation:
435    case nir_intrinsic_load_helper_invocation:
436    case nir_intrinsic_is_helper_invocation:
437    case nir_intrinsic_load_scratch:
438    case nir_intrinsic_deref_atomic_add:
439    case nir_intrinsic_deref_atomic_imin:
440    case nir_intrinsic_deref_atomic_umin:
441    case nir_intrinsic_deref_atomic_imax:
442    case nir_intrinsic_deref_atomic_umax:
443    case nir_intrinsic_deref_atomic_and:
444    case nir_intrinsic_deref_atomic_or:
445    case nir_intrinsic_deref_atomic_xor:
446    case nir_intrinsic_deref_atomic_exchange:
447    case nir_intrinsic_deref_atomic_comp_swap:
448    case nir_intrinsic_deref_atomic_fadd:
449    case nir_intrinsic_deref_atomic_fmin:
450    case nir_intrinsic_deref_atomic_fmax:
451    case nir_intrinsic_deref_atomic_fcomp_swap:
452    case nir_intrinsic_ssbo_atomic_add:
453    case nir_intrinsic_ssbo_atomic_imin:
454    case nir_intrinsic_ssbo_atomic_umin:
455    case nir_intrinsic_ssbo_atomic_imax:
456    case nir_intrinsic_ssbo_atomic_umax:
457    case nir_intrinsic_ssbo_atomic_and:
458    case nir_intrinsic_ssbo_atomic_or:
459    case nir_intrinsic_ssbo_atomic_xor:
460    case nir_intrinsic_ssbo_atomic_exchange:
461    case nir_intrinsic_ssbo_atomic_comp_swap:
462    case nir_intrinsic_ssbo_atomic_fadd:
463    case nir_intrinsic_ssbo_atomic_fmax:
464    case nir_intrinsic_ssbo_atomic_fmin:
465    case nir_intrinsic_ssbo_atomic_fcomp_swap:
466    case nir_intrinsic_image_deref_atomic_add:
467    case nir_intrinsic_image_deref_atomic_imin:
468    case nir_intrinsic_image_deref_atomic_umin:
469    case nir_intrinsic_image_deref_atomic_imax:
470    case nir_intrinsic_image_deref_atomic_umax:
471    case nir_intrinsic_image_deref_atomic_and:
472    case nir_intrinsic_image_deref_atomic_or:
473    case nir_intrinsic_image_deref_atomic_xor:
474    case nir_intrinsic_image_deref_atomic_exchange:
475    case nir_intrinsic_image_deref_atomic_comp_swap:
476    case nir_intrinsic_image_deref_atomic_fadd:
477    case nir_intrinsic_image_deref_atomic_fmin:
478    case nir_intrinsic_image_deref_atomic_fmax:
479    case nir_intrinsic_image_deref_atomic_inc_wrap:
480    case nir_intrinsic_image_deref_atomic_dec_wrap:
481    case nir_intrinsic_image_atomic_add:
482    case nir_intrinsic_image_atomic_imin:
483    case nir_intrinsic_image_atomic_umin:
484    case nir_intrinsic_image_atomic_imax:
485    case nir_intrinsic_image_atomic_umax:
486    case nir_intrinsic_image_atomic_and:
487    case nir_intrinsic_image_atomic_or:
488    case nir_intrinsic_image_atomic_xor:
489    case nir_intrinsic_image_atomic_exchange:
490    case nir_intrinsic_image_atomic_comp_swap:
491    case nir_intrinsic_image_atomic_fadd:
492    case nir_intrinsic_image_atomic_fmin:
493    case nir_intrinsic_image_atomic_fmax:
494    case nir_intrinsic_bindless_image_atomic_add:
495    case nir_intrinsic_bindless_image_atomic_imin:
496    case nir_intrinsic_bindless_image_atomic_umin:
497    case nir_intrinsic_bindless_image_atomic_imax:
498    case nir_intrinsic_bindless_image_atomic_umax:
499    case nir_intrinsic_bindless_image_atomic_and:
500    case nir_intrinsic_bindless_image_atomic_or:
501    case nir_intrinsic_bindless_image_atomic_xor:
502    case nir_intrinsic_bindless_image_atomic_exchange:
503    case nir_intrinsic_bindless_image_atomic_comp_swap:
504    case nir_intrinsic_bindless_image_atomic_fadd:
505    case nir_intrinsic_bindless_image_atomic_fmin:
506    case nir_intrinsic_bindless_image_atomic_fmax:
507    case nir_intrinsic_shared_atomic_add:
508    case nir_intrinsic_shared_atomic_imin:
509    case nir_intrinsic_shared_atomic_umin:
510    case nir_intrinsic_shared_atomic_imax:
511    case nir_intrinsic_shared_atomic_umax:
512    case nir_intrinsic_shared_atomic_and:
513    case nir_intrinsic_shared_atomic_or:
514    case nir_intrinsic_shared_atomic_xor:
515    case nir_intrinsic_shared_atomic_exchange:
516    case nir_intrinsic_shared_atomic_comp_swap:
517    case nir_intrinsic_shared_atomic_fadd:
518    case nir_intrinsic_shared_atomic_fmin:
519    case nir_intrinsic_shared_atomic_fmax:
520    case nir_intrinsic_shared_atomic_fcomp_swap:
521    case nir_intrinsic_task_payload_atomic_add:
522    case nir_intrinsic_task_payload_atomic_imin:
523    case nir_intrinsic_task_payload_atomic_umin:
524    case nir_intrinsic_task_payload_atomic_imax:
525    case nir_intrinsic_task_payload_atomic_umax:
526    case nir_intrinsic_task_payload_atomic_and:
527    case nir_intrinsic_task_payload_atomic_or:
528    case nir_intrinsic_task_payload_atomic_xor:
529    case nir_intrinsic_task_payload_atomic_exchange:
530    case nir_intrinsic_task_payload_atomic_comp_swap:
531    case nir_intrinsic_task_payload_atomic_fadd:
532    case nir_intrinsic_task_payload_atomic_fmin:
533    case nir_intrinsic_task_payload_atomic_fmax:
534    case nir_intrinsic_task_payload_atomic_fcomp_swap:
535    case nir_intrinsic_global_atomic_add:
536    case nir_intrinsic_global_atomic_imin:
537    case nir_intrinsic_global_atomic_umin:
538    case nir_intrinsic_global_atomic_imax:
539    case nir_intrinsic_global_atomic_umax:
540    case nir_intrinsic_global_atomic_and:
541    case nir_intrinsic_global_atomic_or:
542    case nir_intrinsic_global_atomic_xor:
543    case nir_intrinsic_global_atomic_exchange:
544    case nir_intrinsic_global_atomic_comp_swap:
545    case nir_intrinsic_global_atomic_fadd:
546    case nir_intrinsic_global_atomic_fmin:
547    case nir_intrinsic_global_atomic_fmax:
548    case nir_intrinsic_global_atomic_fcomp_swap:
549    case nir_intrinsic_global_atomic_add_amd:
550    case nir_intrinsic_global_atomic_imin_amd:
551    case nir_intrinsic_global_atomic_umin_amd:
552    case nir_intrinsic_global_atomic_imax_amd:
553    case nir_intrinsic_global_atomic_umax_amd:
554    case nir_intrinsic_global_atomic_and_amd:
555    case nir_intrinsic_global_atomic_or_amd:
556    case nir_intrinsic_global_atomic_xor_amd:
557    case nir_intrinsic_global_atomic_exchange_amd:
558    case nir_intrinsic_global_atomic_comp_swap_amd:
559    case nir_intrinsic_global_atomic_fadd_amd:
560    case nir_intrinsic_global_atomic_fmin_amd:
561    case nir_intrinsic_global_atomic_fmax_amd:
562    case nir_intrinsic_global_atomic_fcomp_swap_amd:
563    case nir_intrinsic_global_atomic_add_2x32:
564    case nir_intrinsic_global_atomic_imin_2x32:
565    case nir_intrinsic_global_atomic_umin_2x32:
566    case nir_intrinsic_global_atomic_imax_2x32:
567    case nir_intrinsic_global_atomic_umax_2x32:
568    case nir_intrinsic_global_atomic_and_2x32:
569    case nir_intrinsic_global_atomic_or_2x32:
570    case nir_intrinsic_global_atomic_xor_2x32:
571    case nir_intrinsic_global_atomic_exchange_2x32:
572    case nir_intrinsic_global_atomic_comp_swap_2x32:
573    case nir_intrinsic_global_atomic_fadd_2x32:
574    case nir_intrinsic_global_atomic_fmin_2x32:
575    case nir_intrinsic_global_atomic_fmax_2x32:
576    case nir_intrinsic_global_atomic_fcomp_swap_2x32:
577    case nir_intrinsic_atomic_counter_add:
578    case nir_intrinsic_atomic_counter_min:
579    case nir_intrinsic_atomic_counter_max:
580    case nir_intrinsic_atomic_counter_and:
581    case nir_intrinsic_atomic_counter_or:
582    case nir_intrinsic_atomic_counter_xor:
583    case nir_intrinsic_atomic_counter_inc:
584    case nir_intrinsic_atomic_counter_pre_dec:
585    case nir_intrinsic_atomic_counter_post_dec:
586    case nir_intrinsic_atomic_counter_exchange:
587    case nir_intrinsic_atomic_counter_comp_swap:
588    case nir_intrinsic_atomic_counter_add_deref:
589    case nir_intrinsic_atomic_counter_min_deref:
590    case nir_intrinsic_atomic_counter_max_deref:
591    case nir_intrinsic_atomic_counter_and_deref:
592    case nir_intrinsic_atomic_counter_or_deref:
593    case nir_intrinsic_atomic_counter_xor_deref:
594    case nir_intrinsic_atomic_counter_inc_deref:
595    case nir_intrinsic_atomic_counter_pre_dec_deref:
596    case nir_intrinsic_atomic_counter_post_dec_deref:
597    case nir_intrinsic_atomic_counter_exchange_deref:
598    case nir_intrinsic_atomic_counter_comp_swap_deref:
599    case nir_intrinsic_exclusive_scan:
600    case nir_intrinsic_ballot_bit_count_exclusive:
601    case nir_intrinsic_ballot_bit_count_inclusive:
602    case nir_intrinsic_write_invocation_amd:
603    case nir_intrinsic_mbcnt_amd:
604    case nir_intrinsic_lane_permute_16_amd:
605    case nir_intrinsic_elect:
606    case nir_intrinsic_load_tlb_color_v3d:
607    case nir_intrinsic_load_tess_rel_patch_id_amd:
608    case nir_intrinsic_load_gs_vertex_offset_amd:
609    case nir_intrinsic_has_input_vertex_amd:
610    case nir_intrinsic_has_input_primitive_amd:
611    case nir_intrinsic_load_packed_passthrough_primitive_amd:
612    case nir_intrinsic_load_initial_edgeflags_amd:
613    case nir_intrinsic_gds_atomic_add_amd:
614    case nir_intrinsic_load_rt_arg_scratch_offset_amd:
615    case nir_intrinsic_load_intersection_opaque_amd:
616    case nir_intrinsic_load_vector_arg_amd:
617    case nir_intrinsic_load_btd_stack_id_intel:
618    case nir_intrinsic_load_topology_id_intel:
619    case nir_intrinsic_load_scratch_base_ptr:
620       is_divergent = true;
621       break;
622 
623    default:
624 #ifdef NDEBUG
625       is_divergent = true;
626       break;
627 #else
628       nir_print_instr(&instr->instr, stderr);
629       unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
630 #endif
631    }
632 
633    instr->dest.ssa.divergent = is_divergent;
634    return is_divergent;
635 }
636 
637 static bool
visit_tex(nir_tex_instr * instr)638 visit_tex(nir_tex_instr *instr)
639 {
640    if (instr->dest.ssa.divergent)
641       return false;
642 
643    bool is_divergent = false;
644 
645    for (unsigned i = 0; i < instr->num_srcs; i++) {
646       switch (instr->src[i].src_type) {
647       case nir_tex_src_sampler_deref:
648       case nir_tex_src_sampler_handle:
649       case nir_tex_src_sampler_offset:
650          is_divergent |= instr->src[i].src.ssa->divergent &&
651                          instr->sampler_non_uniform;
652          break;
653       case nir_tex_src_texture_deref:
654       case nir_tex_src_texture_handle:
655       case nir_tex_src_texture_offset:
656          is_divergent |= instr->src[i].src.ssa->divergent &&
657                          instr->texture_non_uniform;
658          break;
659       default:
660          is_divergent |= instr->src[i].src.ssa->divergent;
661          break;
662       }
663    }
664 
665    instr->dest.ssa.divergent = is_divergent;
666    return is_divergent;
667 }
668 
669 static bool
visit_load_const(nir_load_const_instr * instr)670 visit_load_const(nir_load_const_instr *instr)
671 {
672    return false;
673 }
674 
675 static bool
visit_ssa_undef(nir_ssa_undef_instr * instr)676 visit_ssa_undef(nir_ssa_undef_instr *instr)
677 {
678    return false;
679 }
680 
681 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)682 nir_variable_mode_is_uniform(nir_variable_mode mode) {
683    switch (mode) {
684    case nir_var_uniform:
685    case nir_var_mem_ubo:
686    case nir_var_mem_ssbo:
687    case nir_var_mem_shared:
688    case nir_var_mem_task_payload:
689    case nir_var_mem_global:
690    case nir_var_image:
691       return true;
692    default:
693       return false;
694    }
695 }
696 
697 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)698 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
699 {
700    if (nir_variable_mode_is_uniform(var->data.mode))
701       return true;
702 
703    nir_divergence_options options = shader->options->divergence_analysis_options;
704    gl_shader_stage stage = shader->info.stage;
705 
706    if (stage == MESA_SHADER_FRAGMENT &&
707        (options & nir_divergence_single_prim_per_subgroup) &&
708        var->data.mode == nir_var_shader_in &&
709        var->data.interpolation == INTERP_MODE_FLAT)
710       return true;
711 
712    if (stage == MESA_SHADER_TESS_CTRL &&
713        (options & nir_divergence_single_patch_per_tcs_subgroup) &&
714        var->data.mode == nir_var_shader_out && var->data.patch)
715       return true;
716 
717    if (stage == MESA_SHADER_TESS_EVAL &&
718        (options & nir_divergence_single_patch_per_tes_subgroup) &&
719        var->data.mode == nir_var_shader_in && var->data.patch)
720       return true;
721 
722    return false;
723 }
724 
725 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)726 visit_deref(nir_shader *shader, nir_deref_instr *deref)
727 {
728    if (deref->dest.ssa.divergent)
729       return false;
730 
731    bool is_divergent = false;
732    switch (deref->deref_type) {
733    case nir_deref_type_var:
734       is_divergent = !nir_variable_is_uniform(shader, deref->var);
735       break;
736    case nir_deref_type_array:
737    case nir_deref_type_ptr_as_array:
738       is_divergent = deref->arr.index.ssa->divergent;
739       FALLTHROUGH;
740    case nir_deref_type_struct:
741    case nir_deref_type_array_wildcard:
742       is_divergent |= deref->parent.ssa->divergent;
743       break;
744    case nir_deref_type_cast:
745       is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
746                      deref->parent.ssa->divergent;
747       break;
748    }
749 
750    deref->dest.ssa.divergent = is_divergent;
751    return is_divergent;
752 }
753 
754 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)755 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
756 {
757    switch (jump->type) {
758    case nir_jump_continue:
759       if (state->divergent_loop_continue)
760          return false;
761       if (state->divergent_loop_cf)
762          state->divergent_loop_continue = true;
763       return state->divergent_loop_continue;
764    case nir_jump_break:
765       if (state->divergent_loop_break)
766          return false;
767       if (state->divergent_loop_cf)
768          state->divergent_loop_break = true;
769       return state->divergent_loop_break;
770    case nir_jump_halt:
771       /* This totally kills invocations so it doesn't add divergence */
772       break;
773    case nir_jump_return:
774       unreachable("NIR divergence analysis: Unsupported return instruction.");
775       break;
776    case nir_jump_goto:
777    case nir_jump_goto_if:
778       unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
779       break;
780    }
781    return false;
782 }
783 
784 static bool
set_ssa_def_not_divergent(nir_ssa_def * def,UNUSED void * _state)785 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
786 {
787    def->divergent = false;
788    return true;
789 }
790 
791 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr)792 update_instr_divergence(nir_shader *shader, nir_instr *instr)
793 {
794    switch (instr->type) {
795    case nir_instr_type_alu:
796       return visit_alu(nir_instr_as_alu(instr));
797    case nir_instr_type_intrinsic:
798       return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
799    case nir_instr_type_tex:
800       return visit_tex(nir_instr_as_tex(instr));
801    case nir_instr_type_load_const:
802       return visit_load_const(nir_instr_as_load_const(instr));
803    case nir_instr_type_ssa_undef:
804       return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
805    case nir_instr_type_deref:
806       return visit_deref(shader, nir_instr_as_deref(instr));
807    case nir_instr_type_jump:
808    case nir_instr_type_phi:
809    case nir_instr_type_call:
810    case nir_instr_type_parallel_copy:
811    default:
812       unreachable("NIR divergence analysis: Unsupported instruction type.");
813    }
814 }
815 
816 static bool
visit_block(nir_block * block,struct divergence_state * state)817 visit_block(nir_block *block, struct divergence_state *state)
818 {
819    bool has_changed = false;
820 
821    nir_foreach_instr(instr, block) {
822       /* phis are handled when processing the branches */
823       if (instr->type == nir_instr_type_phi)
824          continue;
825 
826       if (state->first_visit)
827          nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
828 
829       if (instr->type == nir_instr_type_jump)
830          has_changed |= visit_jump(nir_instr_as_jump(instr), state);
831       else
832          has_changed |= update_instr_divergence(state->shader, instr);
833    }
834 
835    return has_changed;
836 }
837 
838 /* There are 3 types of phi instructions:
839  * (1) gamma: represent the joining point of different paths
840  *     created by an “if-then-else” branch.
841  *     The resulting value is divergent if the branch condition
842  *     or any of the source values is divergent. */
843 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)844 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
845 {
846    if (phi->dest.ssa.divergent)
847       return false;
848 
849    unsigned defined_srcs = 0;
850    nir_foreach_phi_src(src, phi) {
851       /* if any source value is divergent, the resulting value is divergent */
852       if (src->src.ssa->divergent) {
853          phi->dest.ssa.divergent = true;
854          return true;
855       }
856       if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
857          defined_srcs++;
858       }
859    }
860 
861    /* if the condition is divergent and two sources defined, the definition is divergent */
862    if (defined_srcs > 1 && if_cond_divergent) {
863       phi->dest.ssa.divergent = true;
864       return true;
865    }
866 
867    return false;
868 }
869 
870 /* There are 3 types of phi instructions:
871  * (2) mu: which only exist at loop headers,
872  *     merge initial and loop-carried values.
873  *     The resulting value is divergent if any source value
874  *     is divergent or a divergent loop continue condition
875  *     is associated with a different ssa-def. */
876 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)877 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
878 {
879    if (phi->dest.ssa.divergent)
880       return false;
881 
882    nir_ssa_def* same = NULL;
883    nir_foreach_phi_src(src, phi) {
884       /* if any source value is divergent, the resulting value is divergent */
885       if (src->src.ssa->divergent) {
886          phi->dest.ssa.divergent = true;
887          return true;
888       }
889       /* if this loop is uniform, we're done here */
890       if (!divergent_continue)
891          continue;
892       /* skip the loop preheader */
893       if (src->pred == preheader)
894          continue;
895       /* skip undef values */
896       if (nir_src_is_undef(src->src))
897          continue;
898 
899       /* check if all loop-carried values are from the same ssa-def */
900       if (!same)
901          same = src->src.ssa;
902       else if (same != src->src.ssa) {
903          phi->dest.ssa.divergent = true;
904          return true;
905       }
906    }
907 
908    return false;
909 }
910 
911 /* There are 3 types of phi instructions:
912  * (3) eta: represent values that leave a loop.
913  *     The resulting value is divergent if the source value is divergent
914  *     or any loop exit condition is divergent for a value which is
915  *     not loop-invariant.
916  *     (note: there should be no phi for loop-invariant variables.) */
917 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)918 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
919 {
920    if (phi->dest.ssa.divergent)
921       return false;
922 
923    if (divergent_break) {
924       phi->dest.ssa.divergent = true;
925       return true;
926    }
927 
928    /* if any source value is divergent, the resulting value is divergent */
929    nir_foreach_phi_src(src, phi) {
930       if (src->src.ssa->divergent) {
931          phi->dest.ssa.divergent = true;
932          return true;
933       }
934    }
935 
936    return false;
937 }
938 
939 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)940 visit_if(nir_if *if_stmt, struct divergence_state *state)
941 {
942    bool progress = false;
943 
944    struct divergence_state then_state = *state;
945    then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
946    progress |= visit_cf_list(&if_stmt->then_list, &then_state);
947 
948    struct divergence_state else_state = *state;
949    else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
950    progress |= visit_cf_list(&if_stmt->else_list, &else_state);
951 
952    /* handle phis after the IF */
953    nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
954       if (instr->type != nir_instr_type_phi)
955          break;
956 
957       if (state->first_visit)
958          nir_instr_as_phi(instr)->dest.ssa.divergent = false;
959       progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
960                                      if_stmt->condition.ssa->divergent);
961    }
962 
963    /* join loop divergence information from both branch legs */
964    state->divergent_loop_continue |= then_state.divergent_loop_continue ||
965                                      else_state.divergent_loop_continue;
966    state->divergent_loop_break |= then_state.divergent_loop_break ||
967                                   else_state.divergent_loop_break;
968 
969    /* A divergent continue makes succeeding loop CF divergent:
970     * not all loop-active invocations participate in the remaining loop-body
971     * which means that a following break might be taken by some invocations, only */
972    state->divergent_loop_cf |= state->divergent_loop_continue;
973 
974    return progress;
975 }
976 
977 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)978 visit_loop(nir_loop *loop, struct divergence_state *state)
979 {
980    bool progress = false;
981    nir_block *loop_header = nir_loop_first_block(loop);
982    nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
983 
984    /* handle loop header phis first: we have no knowledge yet about
985     * the loop's control flow or any loop-carried sources. */
986    nir_foreach_instr(instr, loop_header) {
987       if (instr->type != nir_instr_type_phi)
988          break;
989 
990       nir_phi_instr *phi = nir_instr_as_phi(instr);
991       if (!state->first_visit && phi->dest.ssa.divergent)
992          continue;
993 
994       nir_foreach_phi_src(src, phi) {
995          if (src->pred == loop_preheader) {
996             phi->dest.ssa.divergent = src->src.ssa->divergent;
997             break;
998          }
999       }
1000       progress |= phi->dest.ssa.divergent;
1001    }
1002 
1003    /* setup loop state */
1004    struct divergence_state loop_state = *state;
1005    loop_state.divergent_loop_cf = false;
1006    loop_state.divergent_loop_continue = false;
1007    loop_state.divergent_loop_break = false;
1008 
1009    /* process loop body until no further changes are made */
1010    bool repeat;
1011    do {
1012       progress |= visit_cf_list(&loop->body, &loop_state);
1013       repeat = false;
1014 
1015       /* revisit loop header phis to see if something has changed */
1016       nir_foreach_instr(instr, loop_header) {
1017          if (instr->type != nir_instr_type_phi)
1018             break;
1019 
1020          repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
1021                                          loop_preheader,
1022                                          loop_state.divergent_loop_continue);
1023       }
1024 
1025       loop_state.divergent_loop_cf = false;
1026       loop_state.first_visit = false;
1027    } while (repeat);
1028 
1029    /* handle phis after the loop */
1030    nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1031       if (instr->type != nir_instr_type_phi)
1032          break;
1033 
1034       if (state->first_visit)
1035          nir_instr_as_phi(instr)->dest.ssa.divergent = false;
1036       progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
1037                                       loop_state.divergent_loop_break);
1038    }
1039 
1040    loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
1041 
1042    return progress;
1043 }
1044 
1045 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)1046 visit_cf_list(struct exec_list *list, struct divergence_state *state)
1047 {
1048    bool has_changed = false;
1049 
1050    foreach_list_typed(nir_cf_node, node, node, list) {
1051       switch (node->type) {
1052       case nir_cf_node_block:
1053          has_changed |= visit_block(nir_cf_node_as_block(node), state);
1054          break;
1055       case nir_cf_node_if:
1056          has_changed |= visit_if(nir_cf_node_as_if(node), state);
1057          break;
1058       case nir_cf_node_loop:
1059          has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1060          break;
1061       case nir_cf_node_function:
1062          unreachable("NIR divergence analysis: Unsupported cf_node type.");
1063       }
1064    }
1065 
1066    return has_changed;
1067 }
1068 
1069 void
nir_divergence_analysis(nir_shader * shader)1070 nir_divergence_analysis(nir_shader *shader)
1071 {
1072    shader->info.divergence_analysis_run = true;
1073 
1074    struct divergence_state state = {
1075       .stage = shader->info.stage,
1076       .shader = shader,
1077       .divergent_loop_cf = false,
1078       .divergent_loop_continue = false,
1079       .divergent_loop_break = false,
1080       .first_visit = true,
1081    };
1082 
1083    visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1084 }
1085 
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)1086 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1087 {
1088    nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
1089 
1090    if (instr->type == nir_instr_type_phi) {
1091       nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1092       /* can only update gamma/if phis */
1093       if (!prev || prev->type != nir_cf_node_if)
1094          return false;
1095 
1096       nir_if *nif = nir_cf_node_as_if(prev);
1097 
1098       visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1099       return true;
1100    }
1101 
1102    update_instr_divergence(shader, instr);
1103    return true;
1104 }
1105 
1106 
1107 bool
nir_has_divergent_loop(nir_shader * shader)1108 nir_has_divergent_loop(nir_shader *shader)
1109 {
1110    bool divergent_loop = false;
1111    nir_function_impl *func = nir_shader_get_entrypoint(shader);
1112 
1113    foreach_list_typed(nir_cf_node, node, node, &func->body) {
1114       if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) {
1115          divergent_loop = true;
1116          break;
1117       }
1118    }
1119 
1120    return divergent_loop;
1121 }
1122