• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 
27 /* This pass computes for each ssa definition if it is uniform.
28  * That is, the variable has the same value for all invocations
29  * of the group.
30  *
31  * This divergence analysis pass expects the shader to be in LCSSA-form.
32  *
33  * This algorithm implements "The Simple Divergence Analysis" from
34  * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35  * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36  * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37  */
38 
39 struct divergence_state {
40    const gl_shader_stage stage;
41    nir_shader *shader;
42 
43    /* Whether the caller requested vertex divergence (meaning between vertices
44     * of the same primitive) instead of subgroup invocation divergence
45     * (between invocations of the same subgroup). For example, patch input
46     * loads are always convergent, while subgroup intrinsics are divergent
47     * because vertices of the same primitive can be processed by different
48     * subgroups.
49     */
50    bool vertex_divergence;
51 
52    /** current control flow state */
53    /* True if some loop-active invocations might take a different control-flow path.
54     * A divergent break does not cause subsequent control-flow to be considered
55     * divergent because those invocations are no longer active in the loop.
56     * For a divergent if, both sides are considered divergent flow because
57     * the other side is still loop-active. */
58    bool divergent_loop_cf;
59    /* True if a divergent continue happened since the loop header */
60    bool divergent_loop_continue;
61    /* True if a divergent break happened since the loop header */
62    bool divergent_loop_break;
63 
64    /* True if we visit the block for the fist time */
65    bool first_visit;
66 };
67 
68 static bool
69 visit_cf_list(struct exec_list *list, struct divergence_state *state);
70 
71 static bool
visit_alu(nir_alu_instr * instr)72 visit_alu(nir_alu_instr *instr)
73 {
74    if (instr->def.divergent)
75       return false;
76 
77    unsigned num_src = nir_op_infos[instr->op].num_inputs;
78 
79    for (unsigned i = 0; i < num_src; i++) {
80       if (instr->src[i].src.ssa->divergent) {
81          instr->def.divergent = true;
82          return true;
83       }
84    }
85 
86    return false;
87 }
88 
89 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr,bool vertex_divergence)90 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr,
91                 bool vertex_divergence)
92 {
93    if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
94       return false;
95 
96    if (instr->def.divergent)
97       return false;
98 
99    nir_divergence_options options = shader->options->divergence_analysis_options;
100    gl_shader_stage stage = shader->info.stage;
101    bool is_divergent = false;
102    switch (instr->intrinsic) {
103    case nir_intrinsic_shader_clock:
104    case nir_intrinsic_ballot:
105    case nir_intrinsic_ballot_relaxed:
106    case nir_intrinsic_as_uniform:
107    case nir_intrinsic_read_invocation:
108    case nir_intrinsic_read_first_invocation:
109    case nir_intrinsic_vote_any:
110    case nir_intrinsic_vote_all:
111    case nir_intrinsic_vote_feq:
112    case nir_intrinsic_vote_ieq:
113    case nir_intrinsic_first_invocation:
114    case nir_intrinsic_last_invocation:
115    case nir_intrinsic_load_subgroup_eq_mask:
116    case nir_intrinsic_load_subgroup_ge_mask:
117    case nir_intrinsic_load_subgroup_gt_mask:
118    case nir_intrinsic_load_subgroup_le_mask:
119    case nir_intrinsic_load_subgroup_lt_mask:
120    case nir_intrinsic_load_subgroup_id:
121       /* VS/TES/GS invocations of the same primitive can be in different
122        * subgroups, so subgroup ops are always divergent between vertices of
123        * the same primitive.
124        */
125       is_divergent = vertex_divergence;
126       break;
127 
128    /* Intrinsics which are always uniform */
129    case nir_intrinsic_load_push_constant:
130    case nir_intrinsic_load_work_dim:
131    case nir_intrinsic_load_num_workgroups:
132    case nir_intrinsic_load_workgroup_size:
133    case nir_intrinsic_load_num_subgroups:
134    case nir_intrinsic_load_ray_launch_size:
135    case nir_intrinsic_load_sbt_base_amd:
136    case nir_intrinsic_load_subgroup_size:
137    case nir_intrinsic_load_base_instance:
138    case nir_intrinsic_load_base_vertex:
139    case nir_intrinsic_load_first_vertex:
140    case nir_intrinsic_load_draw_id:
141    case nir_intrinsic_load_is_indexed_draw:
142    case nir_intrinsic_load_viewport_scale:
143    case nir_intrinsic_load_user_clip_plane:
144    case nir_intrinsic_load_viewport_x_scale:
145    case nir_intrinsic_load_viewport_y_scale:
146    case nir_intrinsic_load_viewport_z_scale:
147    case nir_intrinsic_load_viewport_offset:
148    case nir_intrinsic_load_viewport_x_offset:
149    case nir_intrinsic_load_viewport_y_offset:
150    case nir_intrinsic_load_viewport_z_offset:
151    case nir_intrinsic_load_viewport_xy_scale_and_offset:
152    case nir_intrinsic_load_blend_const_color_a_float:
153    case nir_intrinsic_load_blend_const_color_b_float:
154    case nir_intrinsic_load_blend_const_color_g_float:
155    case nir_intrinsic_load_blend_const_color_r_float:
156    case nir_intrinsic_load_blend_const_color_rgba:
157    case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
158    case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
159    case nir_intrinsic_load_line_width:
160    case nir_intrinsic_load_aa_line_width:
161    case nir_intrinsic_load_xfb_address:
162    case nir_intrinsic_load_num_vertices:
163    case nir_intrinsic_load_fb_layers_v3d:
164    case nir_intrinsic_load_tcs_num_patches_amd:
165    case nir_intrinsic_load_patch_vertices_in:
166    case nir_intrinsic_load_ring_tess_factors_amd:
167    case nir_intrinsic_load_ring_tess_offchip_amd:
168    case nir_intrinsic_load_ring_tess_factors_offset_amd:
169    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
170    case nir_intrinsic_load_ring_mesh_scratch_amd:
171    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
172    case nir_intrinsic_load_ring_esgs_amd:
173    case nir_intrinsic_load_ring_es2gs_offset_amd:
174    case nir_intrinsic_load_ring_task_draw_amd:
175    case nir_intrinsic_load_ring_task_payload_amd:
176    case nir_intrinsic_load_sample_positions_amd:
177    case nir_intrinsic_load_rasterization_samples_amd:
178    case nir_intrinsic_load_ring_gsvs_amd:
179    case nir_intrinsic_load_ring_gs2vs_offset_amd:
180    case nir_intrinsic_load_streamout_config_amd:
181    case nir_intrinsic_load_streamout_write_index_amd:
182    case nir_intrinsic_load_streamout_offset_amd:
183    case nir_intrinsic_load_task_ring_entry_amd:
184    case nir_intrinsic_load_ring_attr_amd:
185    case nir_intrinsic_load_ring_attr_offset_amd:
186    case nir_intrinsic_load_provoking_vtx_amd:
187    case nir_intrinsic_load_sample_positions_pan:
188    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
189    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
190    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
191    case nir_intrinsic_load_prim_gen_query_enabled_amd:
192    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
193    case nir_intrinsic_load_merged_wave_info_amd:
194    case nir_intrinsic_load_clamp_vertex_color_amd:
195    case nir_intrinsic_load_cull_front_face_enabled_amd:
196    case nir_intrinsic_load_cull_back_face_enabled_amd:
197    case nir_intrinsic_load_cull_ccw_amd:
198    case nir_intrinsic_load_cull_small_primitives_enabled_amd:
199    case nir_intrinsic_load_cull_any_enabled_amd:
200    case nir_intrinsic_load_cull_small_prim_precision_amd:
201    case nir_intrinsic_load_user_data_amd:
202    case nir_intrinsic_load_force_vrs_rates_amd:
203    case nir_intrinsic_load_tess_level_inner_default:
204    case nir_intrinsic_load_tess_level_outer_default:
205    case nir_intrinsic_load_scalar_arg_amd:
206    case nir_intrinsic_load_smem_amd:
207    case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd:
208    case nir_intrinsic_load_resume_shader_address_amd:
209    case nir_intrinsic_load_global_const_block_intel:
210    case nir_intrinsic_load_reloc_const_intel:
211    case nir_intrinsic_load_btd_global_arg_addr_intel:
212    case nir_intrinsic_load_btd_local_arg_addr_intel:
213    case nir_intrinsic_load_mesh_inline_data_intel:
214    case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
215    case nir_intrinsic_load_lshs_vertex_stride_amd:
216    case nir_intrinsic_load_esgs_vertex_stride_amd:
217    case nir_intrinsic_load_hs_out_patch_data_offset_amd:
218    case nir_intrinsic_load_clip_half_line_width_amd:
219    case nir_intrinsic_load_num_vertices_per_primitive_amd:
220    case nir_intrinsic_load_streamout_buffer_amd:
221    case nir_intrinsic_load_ordered_id_amd:
222    case nir_intrinsic_load_gs_wave_id_amd:
223    case nir_intrinsic_load_provoking_vtx_in_prim_amd:
224    case nir_intrinsic_load_lds_ngg_scratch_base_amd:
225    case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
226    case nir_intrinsic_load_btd_shader_type_intel:
227    case nir_intrinsic_load_base_workgroup_id:
228    case nir_intrinsic_load_alpha_reference_amd:
229    case nir_intrinsic_load_ubo_uniform_block_intel:
230    case nir_intrinsic_load_ssbo_uniform_block_intel:
231    case nir_intrinsic_load_shared_uniform_block_intel:
232    case nir_intrinsic_load_barycentric_optimize_amd:
233    case nir_intrinsic_load_poly_line_smooth_enabled:
234    case nir_intrinsic_load_rasterization_primitive_amd:
235    case nir_intrinsic_load_global_constant_uniform_block_intel:
236    case nir_intrinsic_cmat_length:
237       is_divergent = false;
238       break;
239 
240    /* This is divergent because it specifically loads sequential values into
241     * successive SIMD lanes.
242     */
243    case nir_intrinsic_load_global_block_intel:
244       is_divergent = true;
245       break;
246 
247    case nir_intrinsic_decl_reg:
248       is_divergent = nir_intrinsic_divergent(instr);
249       break;
250 
251    /* Intrinsics with divergence depending on shader stage and hardware */
252    case nir_intrinsic_load_shader_record_ptr:
253       is_divergent = !(options & nir_divergence_shader_record_ptr_uniform);
254       break;
255    case nir_intrinsic_load_frag_shading_rate:
256       is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
257       break;
258    case nir_intrinsic_load_input:
259       is_divergent = instr->src[0].ssa->divergent;
260 
261       if (stage == MESA_SHADER_FRAGMENT) {
262          is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
263       } else if (stage == MESA_SHADER_TESS_EVAL) {
264          /* Patch input loads are uniform between vertices of the same primitive. */
265          if (vertex_divergence)
266             is_divergent = false;
267          else
268             is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
269       } else {
270          is_divergent = true;
271       }
272       break;
273    case nir_intrinsic_load_per_vertex_input:
274       is_divergent = instr->src[0].ssa->divergent ||
275                      instr->src[1].ssa->divergent;
276       if (stage == MESA_SHADER_TESS_CTRL)
277          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
278       if (stage == MESA_SHADER_TESS_EVAL)
279          is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
280       else
281          is_divergent = true;
282       break;
283    case nir_intrinsic_load_input_vertex:
284       is_divergent = instr->src[1].ssa->divergent;
285       assert(stage == MESA_SHADER_FRAGMENT);
286       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
287       break;
288    case nir_intrinsic_load_output:
289       is_divergent = instr->src[0].ssa->divergent;
290       switch (stage) {
291       case MESA_SHADER_TESS_CTRL:
292          is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
293          break;
294       case MESA_SHADER_FRAGMENT:
295          is_divergent = true;
296          break;
297       case MESA_SHADER_TASK:
298       case MESA_SHADER_MESH:
299          /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs).
300           * Divergent if src[0] is, so nothing else to do.
301           */
302          break;
303       default:
304          unreachable("Invalid stage for load_output");
305       }
306       break;
307    case nir_intrinsic_load_per_vertex_output:
308       /* TCS and NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
309       assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
310       is_divergent = instr->src[0].ssa->divergent ||
311                      instr->src[1].ssa->divergent ||
312                      (stage == MESA_SHADER_TESS_CTRL &&
313                       !(options & nir_divergence_single_patch_per_tcs_subgroup));
314       break;
315    case nir_intrinsic_load_per_primitive_output:
316       /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
317       assert(stage == MESA_SHADER_MESH);
318       is_divergent = instr->src[0].ssa->divergent ||
319                      instr->src[1].ssa->divergent;
320       break;
321    case nir_intrinsic_load_layer_id:
322    case nir_intrinsic_load_front_face:
323       assert(stage == MESA_SHADER_FRAGMENT);
324       is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
325       break;
326    case nir_intrinsic_load_view_index:
327       assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
328       if (options & nir_divergence_view_index_uniform)
329          is_divergent = false;
330       else if (stage == MESA_SHADER_FRAGMENT)
331          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
332       break;
333    case nir_intrinsic_load_fs_input_interp_deltas:
334       assert(stage == MESA_SHADER_FRAGMENT);
335       is_divergent = instr->src[0].ssa->divergent;
336       is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
337       break;
338    case nir_intrinsic_load_primitive_id:
339       if (stage == MESA_SHADER_FRAGMENT)
340          is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
341       else if (stage == MESA_SHADER_TESS_CTRL)
342          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
343       else if (stage == MESA_SHADER_TESS_EVAL)
344          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
345       else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
346          is_divergent = true;
347       else if (stage == MESA_SHADER_ANY_HIT ||
348                stage == MESA_SHADER_CLOSEST_HIT ||
349                stage == MESA_SHADER_INTERSECTION)
350          is_divergent = true;
351       else
352          unreachable("Invalid stage for load_primitive_id");
353       break;
354    case nir_intrinsic_load_tess_level_inner:
355    case nir_intrinsic_load_tess_level_outer:
356       if (stage == MESA_SHADER_TESS_CTRL)
357          is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
358       else if (stage == MESA_SHADER_TESS_EVAL)
359          is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
360       else
361          unreachable("Invalid stage for load_primitive_tess_level_*");
362       break;
363 
364    case nir_intrinsic_load_workgroup_index:
365    case nir_intrinsic_load_workgroup_id:
366    case nir_intrinsic_load_workgroup_id_zero_base:
367       assert(gl_shader_stage_uses_workgroup(stage));
368       if (stage == MESA_SHADER_COMPUTE)
369          is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
370       break;
371 
372    /* Clustered reductions are uniform if cluster_size == subgroup_size or
373     * the source is uniform and the operation is invariant.
374     * Inclusive scans are uniform if
375     * the source is uniform and the operation is invariant
376     */
377    case nir_intrinsic_reduce:
378       if (nir_intrinsic_cluster_size(instr) == 0) {
379          /* Cluster size of 0 means the subgroup size.
380           * This is uniform within a subgroup, but divergent between
381           * vertices of the same primitive because they may be in
382           * different subgroups.
383           */
384          is_divergent = vertex_divergence;
385          break;
386       }
387       FALLTHROUGH;
388    case nir_intrinsic_inclusive_scan: {
389       nir_op op = nir_intrinsic_reduction_op(instr);
390       is_divergent = instr->src[0].ssa->divergent || vertex_divergence;
391       if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
392           op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
393           op != nir_op_iand && op != nir_op_ior)
394          is_divergent = true;
395       break;
396    }
397 
398    case nir_intrinsic_load_ubo:
399    case nir_intrinsic_load_ssbo:
400       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
401                      instr->src[1].ssa->divergent;
402       break;
403 
404    case nir_intrinsic_get_ssbo_size:
405    case nir_intrinsic_deref_buffer_array_length:
406       is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
407       break;
408 
409    case nir_intrinsic_image_samples_identical:
410    case nir_intrinsic_image_deref_samples_identical:
411    case nir_intrinsic_bindless_image_samples_identical:
412    case nir_intrinsic_image_fragment_mask_load_amd:
413    case nir_intrinsic_image_deref_fragment_mask_load_amd:
414    case nir_intrinsic_bindless_image_fragment_mask_load_amd:
415       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
416                      instr->src[1].ssa->divergent;
417       break;
418 
419    case nir_intrinsic_image_texel_address:
420    case nir_intrinsic_image_deref_texel_address:
421    case nir_intrinsic_bindless_image_texel_address:
422       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
423                      instr->src[1].ssa->divergent || instr->src[2].ssa->divergent;
424       break;
425 
426    case nir_intrinsic_image_load:
427    case nir_intrinsic_image_deref_load:
428    case nir_intrinsic_bindless_image_load:
429    case nir_intrinsic_image_sparse_load:
430    case nir_intrinsic_image_deref_sparse_load:
431    case nir_intrinsic_bindless_image_sparse_load:
432       is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
433                      instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
434       break;
435 
436    case nir_intrinsic_optimization_barrier_vgpr_amd:
437       is_divergent = instr->src[0].ssa->divergent;
438       break;
439 
440    /* Intrinsics with divergence depending on sources */
441    case nir_intrinsic_convert_alu_types:
442    case nir_intrinsic_ballot_bitfield_extract:
443    case nir_intrinsic_ballot_find_lsb:
444    case nir_intrinsic_ballot_find_msb:
445    case nir_intrinsic_ballot_bit_count_reduce:
446    case nir_intrinsic_rotate:
447    case nir_intrinsic_shuffle_xor:
448    case nir_intrinsic_shuffle_up:
449    case nir_intrinsic_shuffle_down:
450    case nir_intrinsic_quad_broadcast:
451    case nir_intrinsic_quad_swap_horizontal:
452    case nir_intrinsic_quad_swap_vertical:
453    case nir_intrinsic_quad_swap_diagonal:
454    case nir_intrinsic_quad_vote_any:
455    case nir_intrinsic_quad_vote_all:
456    case nir_intrinsic_load_deref:
457    case nir_intrinsic_load_shared:
458    case nir_intrinsic_load_shared2_amd:
459    case nir_intrinsic_load_global:
460    case nir_intrinsic_load_global_2x32:
461    case nir_intrinsic_load_global_constant:
462    case nir_intrinsic_load_global_amd:
463    case nir_intrinsic_load_uniform:
464    case nir_intrinsic_load_constant:
465    case nir_intrinsic_load_sample_pos_from_id:
466    case nir_intrinsic_load_kernel_input:
467    case nir_intrinsic_load_task_payload:
468    case nir_intrinsic_load_buffer_amd:
469    case nir_intrinsic_load_typed_buffer_amd:
470    case nir_intrinsic_image_samples:
471    case nir_intrinsic_image_deref_samples:
472    case nir_intrinsic_bindless_image_samples:
473    case nir_intrinsic_image_size:
474    case nir_intrinsic_image_deref_size:
475    case nir_intrinsic_bindless_image_size:
476    case nir_intrinsic_image_descriptor_amd:
477    case nir_intrinsic_image_deref_descriptor_amd:
478    case nir_intrinsic_bindless_image_descriptor_amd:
479    case nir_intrinsic_strict_wqm_coord_amd:
480    case nir_intrinsic_copy_deref:
481    case nir_intrinsic_vulkan_resource_index:
482    case nir_intrinsic_vulkan_resource_reindex:
483    case nir_intrinsic_load_vulkan_descriptor:
484    case nir_intrinsic_atomic_counter_read:
485    case nir_intrinsic_atomic_counter_read_deref:
486    case nir_intrinsic_quad_swizzle_amd:
487    case nir_intrinsic_masked_swizzle_amd:
488    case nir_intrinsic_is_sparse_texels_resident:
489    case nir_intrinsic_sparse_residency_code_and:
490    case nir_intrinsic_bvh64_intersect_ray_amd:
491    case nir_intrinsic_image_deref_load_param_intel:
492    case nir_intrinsic_image_load_raw_intel:
493    case nir_intrinsic_get_ubo_size:
494    case nir_intrinsic_load_ssbo_address:
495    case nir_intrinsic_load_desc_set_address_intel:
496    case nir_intrinsic_load_desc_set_dynamic_index_intel:
497    case nir_intrinsic_load_global_constant_bounded:
498    case nir_intrinsic_load_global_constant_offset:
499    case nir_intrinsic_resource_intel:
500    case nir_intrinsic_load_reg:
501    case nir_intrinsic_load_reg_indirect: {
502       unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
503       for (unsigned i = 0; i < num_srcs; i++) {
504          if (instr->src[i].ssa->divergent) {
505             is_divergent = true;
506             break;
507          }
508       }
509       break;
510    }
511 
512    case nir_intrinsic_shuffle:
513       is_divergent = instr->src[0].ssa->divergent &&
514                      instr->src[1].ssa->divergent;
515       break;
516 
517    /* Intrinsics which are always divergent */
518    case nir_intrinsic_inverse_ballot:
519    case nir_intrinsic_load_color0:
520    case nir_intrinsic_load_color1:
521    case nir_intrinsic_load_param:
522    case nir_intrinsic_load_sample_id:
523    case nir_intrinsic_load_sample_id_no_per_sample:
524    case nir_intrinsic_load_sample_mask_in:
525    case nir_intrinsic_load_interpolated_input:
526    case nir_intrinsic_load_point_coord_maybe_flipped:
527    case nir_intrinsic_load_barycentric_pixel:
528    case nir_intrinsic_load_barycentric_centroid:
529    case nir_intrinsic_load_barycentric_sample:
530    case nir_intrinsic_load_barycentric_model:
531    case nir_intrinsic_load_barycentric_at_sample:
532    case nir_intrinsic_load_barycentric_at_offset:
533    case nir_intrinsic_load_barycentric_at_offset_nv:
534    case nir_intrinsic_load_barycentric_coord_pixel:
535    case nir_intrinsic_load_barycentric_coord_centroid:
536    case nir_intrinsic_load_barycentric_coord_sample:
537    case nir_intrinsic_load_barycentric_coord_at_sample:
538    case nir_intrinsic_load_barycentric_coord_at_offset:
539    case nir_intrinsic_interp_deref_at_offset:
540    case nir_intrinsic_interp_deref_at_sample:
541    case nir_intrinsic_interp_deref_at_centroid:
542    case nir_intrinsic_interp_deref_at_vertex:
543    case nir_intrinsic_load_tess_coord:
544    case nir_intrinsic_load_tess_coord_xy:
545    case nir_intrinsic_load_point_coord:
546    case nir_intrinsic_load_line_coord:
547    case nir_intrinsic_load_frag_coord:
548    case nir_intrinsic_load_frag_coord_zw:
549    case nir_intrinsic_load_pixel_coord:
550    case nir_intrinsic_load_fully_covered:
551    case nir_intrinsic_load_sample_pos:
552    case nir_intrinsic_load_sample_pos_or_center:
553    case nir_intrinsic_load_vertex_id_zero_base:
554    case nir_intrinsic_load_vertex_id:
555    case nir_intrinsic_load_instance_id:
556    case nir_intrinsic_load_invocation_id:
557    case nir_intrinsic_load_local_invocation_id:
558    case nir_intrinsic_load_local_invocation_index:
559    case nir_intrinsic_load_global_invocation_id:
560    case nir_intrinsic_load_global_invocation_id_zero_base:
561    case nir_intrinsic_load_global_invocation_index:
562    case nir_intrinsic_load_subgroup_invocation:
563    case nir_intrinsic_load_helper_invocation:
564    case nir_intrinsic_is_helper_invocation:
565    case nir_intrinsic_load_scratch:
566    case nir_intrinsic_deref_atomic:
567    case nir_intrinsic_deref_atomic_swap:
568    case nir_intrinsic_ssbo_atomic:
569    case nir_intrinsic_ssbo_atomic_swap:
570    case nir_intrinsic_image_deref_atomic:
571    case nir_intrinsic_image_deref_atomic_swap:
572    case nir_intrinsic_image_atomic:
573    case nir_intrinsic_image_atomic_swap:
574    case nir_intrinsic_bindless_image_atomic:
575    case nir_intrinsic_bindless_image_atomic_swap:
576    case nir_intrinsic_shared_atomic:
577    case nir_intrinsic_shared_atomic_swap:
578    case nir_intrinsic_task_payload_atomic:
579    case nir_intrinsic_task_payload_atomic_swap:
580    case nir_intrinsic_global_atomic:
581    case nir_intrinsic_global_atomic_swap:
582    case nir_intrinsic_global_atomic_amd:
583    case nir_intrinsic_global_atomic_swap_amd:
584    case nir_intrinsic_global_atomic_2x32:
585    case nir_intrinsic_global_atomic_swap_2x32:
586    case nir_intrinsic_atomic_counter_add:
587    case nir_intrinsic_atomic_counter_min:
588    case nir_intrinsic_atomic_counter_max:
589    case nir_intrinsic_atomic_counter_and:
590    case nir_intrinsic_atomic_counter_or:
591    case nir_intrinsic_atomic_counter_xor:
592    case nir_intrinsic_atomic_counter_inc:
593    case nir_intrinsic_atomic_counter_pre_dec:
594    case nir_intrinsic_atomic_counter_post_dec:
595    case nir_intrinsic_atomic_counter_exchange:
596    case nir_intrinsic_atomic_counter_comp_swap:
597    case nir_intrinsic_atomic_counter_add_deref:
598    case nir_intrinsic_atomic_counter_min_deref:
599    case nir_intrinsic_atomic_counter_max_deref:
600    case nir_intrinsic_atomic_counter_and_deref:
601    case nir_intrinsic_atomic_counter_or_deref:
602    case nir_intrinsic_atomic_counter_xor_deref:
603    case nir_intrinsic_atomic_counter_inc_deref:
604    case nir_intrinsic_atomic_counter_pre_dec_deref:
605    case nir_intrinsic_atomic_counter_post_dec_deref:
606    case nir_intrinsic_atomic_counter_exchange_deref:
607    case nir_intrinsic_atomic_counter_comp_swap_deref:
608    case nir_intrinsic_exclusive_scan:
609    case nir_intrinsic_ballot_bit_count_exclusive:
610    case nir_intrinsic_ballot_bit_count_inclusive:
611    case nir_intrinsic_write_invocation_amd:
612    case nir_intrinsic_mbcnt_amd:
613    case nir_intrinsic_lane_permute_16_amd:
614    case nir_intrinsic_elect:
615    case nir_intrinsic_load_tlb_color_v3d:
616    case nir_intrinsic_load_tess_rel_patch_id_amd:
617    case nir_intrinsic_load_gs_vertex_offset_amd:
618    case nir_intrinsic_is_subgroup_invocation_lt_amd:
619    case nir_intrinsic_load_packed_passthrough_primitive_amd:
620    case nir_intrinsic_load_initial_edgeflags_amd:
621    case nir_intrinsic_gds_atomic_add_amd:
622    case nir_intrinsic_load_rt_arg_scratch_offset_amd:
623    case nir_intrinsic_load_intersection_opaque_amd:
624    case nir_intrinsic_load_vector_arg_amd:
625    case nir_intrinsic_load_btd_stack_id_intel:
626    case nir_intrinsic_load_topology_id_intel:
627    case nir_intrinsic_load_scratch_base_ptr:
628    case nir_intrinsic_ordered_xfb_counter_add_amd:
629    case nir_intrinsic_xfb_counter_sub_amd:
630    case nir_intrinsic_load_stack:
631    case nir_intrinsic_load_ray_launch_id:
632    case nir_intrinsic_load_ray_instance_custom_index:
633    case nir_intrinsic_load_ray_geometry_index:
634    case nir_intrinsic_load_ray_world_direction:
635    case nir_intrinsic_load_ray_world_origin:
636    case nir_intrinsic_load_ray_object_origin:
637    case nir_intrinsic_load_ray_object_direction:
638    case nir_intrinsic_load_ray_t_min:
639    case nir_intrinsic_load_ray_t_max:
640    case nir_intrinsic_load_ray_object_to_world:
641    case nir_intrinsic_load_ray_world_to_object:
642    case nir_intrinsic_load_ray_hit_kind:
643    case nir_intrinsic_load_ray_flags:
644    case nir_intrinsic_load_cull_mask:
645    case nir_intrinsic_load_sysval_nv:
646    case nir_intrinsic_emit_vertex_nv:
647    case nir_intrinsic_end_primitive_nv:
648    case nir_intrinsic_report_ray_intersection:
649    case nir_intrinsic_rq_proceed:
650    case nir_intrinsic_rq_load:
651    case nir_intrinsic_load_ray_triangle_vertex_positions:
652    case nir_intrinsic_cmat_extract:
653    case nir_intrinsic_cmat_muladd_amd:
654    case nir_intrinsic_dpas_intel:
655    case nir_intrinsic_isberd_nv:
656    case nir_intrinsic_al2p_nv:
657    case nir_intrinsic_ald_nv:
658    case nir_intrinsic_ipa_nv:
659    case nir_intrinsic_ldtram_nv:
660    case nir_intrinsic_printf:
661       is_divergent = true;
662       break;
663 
664    default:
665 #ifdef NDEBUG
666       is_divergent = true;
667       break;
668 #else
669       nir_print_instr(&instr->instr, stderr);
670       unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
671 #endif
672    }
673 
674    instr->def.divergent = is_divergent;
675    return is_divergent;
676 }
677 
678 static bool
visit_tex(nir_tex_instr * instr)679 visit_tex(nir_tex_instr *instr)
680 {
681    if (instr->def.divergent)
682       return false;
683 
684    bool is_divergent = false;
685 
686    for (unsigned i = 0; i < instr->num_srcs; i++) {
687       switch (instr->src[i].src_type) {
688       case nir_tex_src_sampler_deref:
689       case nir_tex_src_sampler_handle:
690       case nir_tex_src_sampler_offset:
691          is_divergent |= instr->src[i].src.ssa->divergent &&
692                          instr->sampler_non_uniform;
693          break;
694       case nir_tex_src_texture_deref:
695       case nir_tex_src_texture_handle:
696       case nir_tex_src_texture_offset:
697          is_divergent |= instr->src[i].src.ssa->divergent &&
698                          instr->texture_non_uniform;
699          break;
700       default:
701          is_divergent |= instr->src[i].src.ssa->divergent;
702          break;
703       }
704    }
705 
706    instr->def.divergent = is_divergent;
707    return is_divergent;
708 }
709 
710 static bool
visit_load_const(nir_load_const_instr * instr)711 visit_load_const(nir_load_const_instr *instr)
712 {
713    return false;
714 }
715 
716 static bool
visit_ssa_undef(nir_undef_instr * instr)717 visit_ssa_undef(nir_undef_instr *instr)
718 {
719    return false;
720 }
721 
722 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)723 nir_variable_mode_is_uniform(nir_variable_mode mode)
724 {
725    switch (mode) {
726    case nir_var_uniform:
727    case nir_var_mem_ubo:
728    case nir_var_mem_ssbo:
729    case nir_var_mem_shared:
730    case nir_var_mem_task_payload:
731    case nir_var_mem_global:
732    case nir_var_image:
733       return true;
734    default:
735       return false;
736    }
737 }
738 
739 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)740 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
741 {
742    if (nir_variable_mode_is_uniform(var->data.mode))
743       return true;
744 
745    nir_divergence_options options = shader->options->divergence_analysis_options;
746    gl_shader_stage stage = shader->info.stage;
747 
748    if (stage == MESA_SHADER_FRAGMENT &&
749        (options & nir_divergence_single_prim_per_subgroup) &&
750        var->data.mode == nir_var_shader_in &&
751        var->data.interpolation == INTERP_MODE_FLAT)
752       return true;
753 
754    if (stage == MESA_SHADER_TESS_CTRL &&
755        (options & nir_divergence_single_patch_per_tcs_subgroup) &&
756        var->data.mode == nir_var_shader_out && var->data.patch)
757       return true;
758 
759    if (stage == MESA_SHADER_TESS_EVAL &&
760        (options & nir_divergence_single_patch_per_tes_subgroup) &&
761        var->data.mode == nir_var_shader_in && var->data.patch)
762       return true;
763 
764    return false;
765 }
766 
767 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)768 visit_deref(nir_shader *shader, nir_deref_instr *deref)
769 {
770    if (deref->def.divergent)
771       return false;
772 
773    bool is_divergent = false;
774    switch (deref->deref_type) {
775    case nir_deref_type_var:
776       is_divergent = !nir_variable_is_uniform(shader, deref->var);
777       break;
778    case nir_deref_type_array:
779    case nir_deref_type_ptr_as_array:
780       is_divergent = deref->arr.index.ssa->divergent;
781       FALLTHROUGH;
782    case nir_deref_type_struct:
783    case nir_deref_type_array_wildcard:
784       is_divergent |= deref->parent.ssa->divergent;
785       break;
786    case nir_deref_type_cast:
787       is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
788                      deref->parent.ssa->divergent;
789       break;
790    }
791 
792    deref->def.divergent = is_divergent;
793    return is_divergent;
794 }
795 
796 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)797 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
798 {
799    switch (jump->type) {
800    case nir_jump_continue:
801       if (state->divergent_loop_continue)
802          return false;
803       if (state->divergent_loop_cf)
804          state->divergent_loop_continue = true;
805       return state->divergent_loop_continue;
806    case nir_jump_break:
807       if (state->divergent_loop_break)
808          return false;
809       if (state->divergent_loop_cf)
810          state->divergent_loop_break = true;
811       return state->divergent_loop_break;
812    case nir_jump_halt:
813       /* This totally kills invocations so it doesn't add divergence */
814       break;
815    case nir_jump_return:
816       unreachable("NIR divergence analysis: Unsupported return instruction.");
817       break;
818    case nir_jump_goto:
819    case nir_jump_goto_if:
820       unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
821       break;
822    }
823    return false;
824 }
825 
826 static bool
set_ssa_def_not_divergent(nir_def * def,UNUSED void * _state)827 set_ssa_def_not_divergent(nir_def *def, UNUSED void *_state)
828 {
829    def->divergent = false;
830    return true;
831 }
832 
833 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr,bool vertex_divergence)834 update_instr_divergence(nir_shader *shader, nir_instr *instr,
835                         bool vertex_divergence)
836 {
837    switch (instr->type) {
838    case nir_instr_type_alu:
839       return visit_alu(nir_instr_as_alu(instr));
840    case nir_instr_type_intrinsic:
841       return visit_intrinsic(shader, nir_instr_as_intrinsic(instr),
842                              vertex_divergence);
843    case nir_instr_type_tex:
844       return visit_tex(nir_instr_as_tex(instr));
845    case nir_instr_type_load_const:
846       return visit_load_const(nir_instr_as_load_const(instr));
847    case nir_instr_type_undef:
848       return visit_ssa_undef(nir_instr_as_undef(instr));
849    case nir_instr_type_deref:
850       return visit_deref(shader, nir_instr_as_deref(instr));
851    case nir_instr_type_jump:
852    case nir_instr_type_phi:
853    case nir_instr_type_call:
854    case nir_instr_type_parallel_copy:
855    default:
856       unreachable("NIR divergence analysis: Unsupported instruction type.");
857    }
858 }
859 
860 static bool
visit_block(nir_block * block,struct divergence_state * state)861 visit_block(nir_block *block, struct divergence_state *state)
862 {
863    bool has_changed = false;
864 
865    nir_foreach_instr(instr, block) {
866       /* phis are handled when processing the branches */
867       if (instr->type == nir_instr_type_phi)
868          continue;
869 
870       if (state->first_visit)
871          nir_foreach_def(instr, set_ssa_def_not_divergent, NULL);
872 
873       if (instr->type == nir_instr_type_jump) {
874          has_changed |= visit_jump(nir_instr_as_jump(instr), state);
875       } else {
876          has_changed |= update_instr_divergence(state->shader, instr,
877                                                 state->vertex_divergence);
878       }
879    }
880 
881    return has_changed;
882 }
883 
884 /* There are 3 types of phi instructions:
885  * (1) gamma: represent the joining point of different paths
886  *     created by an “if-then-else” branch.
887  *     The resulting value is divergent if the branch condition
888  *     or any of the source values is divergent. */
889 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)890 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
891 {
892    if (phi->def.divergent)
893       return false;
894 
895    unsigned defined_srcs = 0;
896    nir_foreach_phi_src(src, phi) {
897       /* if any source value is divergent, the resulting value is divergent */
898       if (src->src.ssa->divergent) {
899          phi->def.divergent = true;
900          return true;
901       }
902       if (src->src.ssa->parent_instr->type != nir_instr_type_undef) {
903          defined_srcs++;
904       }
905    }
906 
907    /* if the condition is divergent and two sources defined, the definition is divergent */
908    if (defined_srcs > 1 && if_cond_divergent) {
909       phi->def.divergent = true;
910       return true;
911    }
912 
913    return false;
914 }
915 
916 /* There are 3 types of phi instructions:
917  * (2) mu: which only exist at loop headers,
918  *     merge initial and loop-carried values.
919  *     The resulting value is divergent if any source value
920  *     is divergent or a divergent loop continue condition
921  *     is associated with a different ssa-def. */
922 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)923 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
924 {
925    if (phi->def.divergent)
926       return false;
927 
928    nir_def *same = NULL;
929    nir_foreach_phi_src(src, phi) {
930       /* if any source value is divergent, the resulting value is divergent */
931       if (src->src.ssa->divergent) {
932          phi->def.divergent = true;
933          return true;
934       }
935       /* if this loop is uniform, we're done here */
936       if (!divergent_continue)
937          continue;
938       /* skip the loop preheader */
939       if (src->pred == preheader)
940          continue;
941       /* skip undef values */
942       if (nir_src_is_undef(src->src))
943          continue;
944 
945       /* check if all loop-carried values are from the same ssa-def */
946       if (!same)
947          same = src->src.ssa;
948       else if (same != src->src.ssa) {
949          phi->def.divergent = true;
950          return true;
951       }
952    }
953 
954    return false;
955 }
956 
957 /* There are 3 types of phi instructions:
958  * (3) eta: represent values that leave a loop.
959  *     The resulting value is divergent if the source value is divergent
960  *     or any loop exit condition is divergent for a value which is
961  *     not loop-invariant.
962  *     (note: there should be no phi for loop-invariant variables.) */
963 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)964 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
965 {
966    if (phi->def.divergent)
967       return false;
968 
969    if (divergent_break) {
970       phi->def.divergent = true;
971       return true;
972    }
973 
974    /* if any source value is divergent, the resulting value is divergent */
975    nir_foreach_phi_src(src, phi) {
976       if (src->src.ssa->divergent) {
977          phi->def.divergent = true;
978          return true;
979       }
980    }
981 
982    return false;
983 }
984 
985 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)986 visit_if(nir_if *if_stmt, struct divergence_state *state)
987 {
988    bool progress = false;
989 
990    struct divergence_state then_state = *state;
991    then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
992    progress |= visit_cf_list(&if_stmt->then_list, &then_state);
993 
994    struct divergence_state else_state = *state;
995    else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
996    progress |= visit_cf_list(&if_stmt->else_list, &else_state);
997 
998    /* handle phis after the IF */
999    nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
1000       if (state->first_visit)
1001          phi->def.divergent = false;
1002       progress |= visit_if_merge_phi(phi, if_stmt->condition.ssa->divergent);
1003    }
1004 
1005    /* join loop divergence information from both branch legs */
1006    state->divergent_loop_continue |= then_state.divergent_loop_continue ||
1007                                      else_state.divergent_loop_continue;
1008    state->divergent_loop_break |= then_state.divergent_loop_break ||
1009                                   else_state.divergent_loop_break;
1010 
1011    /* A divergent continue makes succeeding loop CF divergent:
1012     * not all loop-active invocations participate in the remaining loop-body
1013     * which means that a following break might be taken by some invocations, only */
1014    state->divergent_loop_cf |= state->divergent_loop_continue;
1015 
1016    return progress;
1017 }
1018 
1019 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)1020 visit_loop(nir_loop *loop, struct divergence_state *state)
1021 {
1022    assert(!nir_loop_has_continue_construct(loop));
1023    bool progress = false;
1024    nir_block *loop_header = nir_loop_first_block(loop);
1025    nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
1026 
1027    /* handle loop header phis first: we have no knowledge yet about
1028     * the loop's control flow or any loop-carried sources. */
1029    nir_foreach_phi(phi, loop_header) {
1030       if (!state->first_visit && phi->def.divergent)
1031          continue;
1032 
1033       nir_foreach_phi_src(src, phi) {
1034          if (src->pred == loop_preheader) {
1035             phi->def.divergent = src->src.ssa->divergent;
1036             break;
1037          }
1038       }
1039       progress |= phi->def.divergent;
1040    }
1041 
1042    /* setup loop state */
1043    struct divergence_state loop_state = *state;
1044    loop_state.divergent_loop_cf = false;
1045    loop_state.divergent_loop_continue = false;
1046    loop_state.divergent_loop_break = false;
1047 
1048    /* process loop body until no further changes are made */
1049    bool repeat;
1050    do {
1051       progress |= visit_cf_list(&loop->body, &loop_state);
1052       repeat = false;
1053 
1054       /* revisit loop header phis to see if something has changed */
1055       nir_foreach_phi(phi, loop_header) {
1056          repeat |= visit_loop_header_phi(phi, loop_preheader,
1057                                          loop_state.divergent_loop_continue);
1058       }
1059 
1060       loop_state.divergent_loop_cf = false;
1061       loop_state.first_visit = false;
1062    } while (repeat);
1063 
1064    /* handle phis after the loop */
1065    nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1066       if (state->first_visit)
1067          phi->def.divergent = false;
1068       progress |= visit_loop_exit_phi(phi, loop_state.divergent_loop_break);
1069    }
1070 
1071    loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
1072 
1073    return progress;
1074 }
1075 
1076 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)1077 visit_cf_list(struct exec_list *list, struct divergence_state *state)
1078 {
1079    bool has_changed = false;
1080 
1081    foreach_list_typed(nir_cf_node, node, node, list) {
1082       switch (node->type) {
1083       case nir_cf_node_block:
1084          has_changed |= visit_block(nir_cf_node_as_block(node), state);
1085          break;
1086       case nir_cf_node_if:
1087          has_changed |= visit_if(nir_cf_node_as_if(node), state);
1088          break;
1089       case nir_cf_node_loop:
1090          has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1091          break;
1092       case nir_cf_node_function:
1093          unreachable("NIR divergence analysis: Unsupported cf_node type.");
1094       }
1095    }
1096 
1097    return has_changed;
1098 }
1099 
1100 void
nir_divergence_analysis(nir_shader * shader)1101 nir_divergence_analysis(nir_shader *shader)
1102 {
1103    shader->info.divergence_analysis_run = true;
1104 
1105    struct divergence_state state = {
1106       .stage = shader->info.stage,
1107       .shader = shader,
1108       .divergent_loop_cf = false,
1109       .divergent_loop_continue = false,
1110       .divergent_loop_break = false,
1111       .first_visit = true,
1112    };
1113 
1114    visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1115 }
1116 
1117 /* Compute divergence between vertices of the same primitive. This uses
1118  * the same divergent field in nir_def and nir_loop as the regular divergence
1119  * pass.
1120  */
1121 void
nir_vertex_divergence_analysis(nir_shader * shader)1122 nir_vertex_divergence_analysis(nir_shader *shader)
1123 {
1124    shader->info.divergence_analysis_run = false;
1125 
1126    struct divergence_state state = {
1127       .stage = shader->info.stage,
1128       .shader = shader,
1129       .vertex_divergence = true,
1130       .first_visit = true,
1131    };
1132 
1133    visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1134 }
1135 
1136 bool
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)1137 nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1138 {
1139    nir_foreach_def(instr, set_ssa_def_not_divergent, NULL);
1140 
1141    if (instr->type == nir_instr_type_phi) {
1142       nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1143       /* can only update gamma/if phis */
1144       if (!prev || prev->type != nir_cf_node_if)
1145          return false;
1146 
1147       nir_if *nif = nir_cf_node_as_if(prev);
1148 
1149       visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1150       return true;
1151    }
1152 
1153    update_instr_divergence(shader, instr, false);
1154    return true;
1155 }
1156 
1157 bool
nir_has_divergent_loop(nir_shader * shader)1158 nir_has_divergent_loop(nir_shader *shader)
1159 {
1160    bool divergent_loop = false;
1161    nir_function_impl *func = nir_shader_get_entrypoint(shader);
1162 
1163    foreach_list_typed(nir_cf_node, node, node, &func->body) {
1164       if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) {
1165          divergent_loop = true;
1166          break;
1167       }
1168    }
1169 
1170    return divergent_loop;
1171 }
1172