1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 struct divergence_state {
40 const gl_shader_stage stage;
41 nir_shader *shader;
42
43 /* Whether the caller requested vertex divergence (meaning between vertices
44 * of the same primitive) instead of subgroup invocation divergence
45 * (between invocations of the same subgroup). For example, patch input
46 * loads are always convergent, while subgroup intrinsics are divergent
47 * because vertices of the same primitive can be processed by different
48 * subgroups.
49 */
50 bool vertex_divergence;
51
52 /** current control flow state */
53 /* True if some loop-active invocations might take a different control-flow path.
54 * A divergent break does not cause subsequent control-flow to be considered
55 * divergent because those invocations are no longer active in the loop.
56 * For a divergent if, both sides are considered divergent flow because
57 * the other side is still loop-active. */
58 bool divergent_loop_cf;
59 /* True if a divergent continue happened since the loop header */
60 bool divergent_loop_continue;
61 /* True if a divergent break happened since the loop header */
62 bool divergent_loop_break;
63
64 /* True if we visit the block for the fist time */
65 bool first_visit;
66 };
67
68 static bool
69 visit_cf_list(struct exec_list *list, struct divergence_state *state);
70
71 static bool
visit_alu(nir_alu_instr * instr)72 visit_alu(nir_alu_instr *instr)
73 {
74 if (instr->def.divergent)
75 return false;
76
77 unsigned num_src = nir_op_infos[instr->op].num_inputs;
78
79 for (unsigned i = 0; i < num_src; i++) {
80 if (instr->src[i].src.ssa->divergent) {
81 instr->def.divergent = true;
82 return true;
83 }
84 }
85
86 return false;
87 }
88
89 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr,bool vertex_divergence)90 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr,
91 bool vertex_divergence)
92 {
93 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
94 return false;
95
96 if (instr->def.divergent)
97 return false;
98
99 nir_divergence_options options = shader->options->divergence_analysis_options;
100 gl_shader_stage stage = shader->info.stage;
101 bool is_divergent = false;
102 switch (instr->intrinsic) {
103 case nir_intrinsic_shader_clock:
104 case nir_intrinsic_ballot:
105 case nir_intrinsic_ballot_relaxed:
106 case nir_intrinsic_as_uniform:
107 case nir_intrinsic_read_invocation:
108 case nir_intrinsic_read_first_invocation:
109 case nir_intrinsic_vote_any:
110 case nir_intrinsic_vote_all:
111 case nir_intrinsic_vote_feq:
112 case nir_intrinsic_vote_ieq:
113 case nir_intrinsic_first_invocation:
114 case nir_intrinsic_last_invocation:
115 case nir_intrinsic_load_subgroup_eq_mask:
116 case nir_intrinsic_load_subgroup_ge_mask:
117 case nir_intrinsic_load_subgroup_gt_mask:
118 case nir_intrinsic_load_subgroup_le_mask:
119 case nir_intrinsic_load_subgroup_lt_mask:
120 case nir_intrinsic_load_subgroup_id:
121 /* VS/TES/GS invocations of the same primitive can be in different
122 * subgroups, so subgroup ops are always divergent between vertices of
123 * the same primitive.
124 */
125 is_divergent = vertex_divergence;
126 break;
127
128 /* Intrinsics which are always uniform */
129 case nir_intrinsic_load_push_constant:
130 case nir_intrinsic_load_work_dim:
131 case nir_intrinsic_load_num_workgroups:
132 case nir_intrinsic_load_workgroup_size:
133 case nir_intrinsic_load_num_subgroups:
134 case nir_intrinsic_load_ray_launch_size:
135 case nir_intrinsic_load_sbt_base_amd:
136 case nir_intrinsic_load_subgroup_size:
137 case nir_intrinsic_load_base_instance:
138 case nir_intrinsic_load_base_vertex:
139 case nir_intrinsic_load_first_vertex:
140 case nir_intrinsic_load_draw_id:
141 case nir_intrinsic_load_is_indexed_draw:
142 case nir_intrinsic_load_viewport_scale:
143 case nir_intrinsic_load_user_clip_plane:
144 case nir_intrinsic_load_viewport_x_scale:
145 case nir_intrinsic_load_viewport_y_scale:
146 case nir_intrinsic_load_viewport_z_scale:
147 case nir_intrinsic_load_viewport_offset:
148 case nir_intrinsic_load_viewport_x_offset:
149 case nir_intrinsic_load_viewport_y_offset:
150 case nir_intrinsic_load_viewport_z_offset:
151 case nir_intrinsic_load_viewport_xy_scale_and_offset:
152 case nir_intrinsic_load_blend_const_color_a_float:
153 case nir_intrinsic_load_blend_const_color_b_float:
154 case nir_intrinsic_load_blend_const_color_g_float:
155 case nir_intrinsic_load_blend_const_color_r_float:
156 case nir_intrinsic_load_blend_const_color_rgba:
157 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
158 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
159 case nir_intrinsic_load_line_width:
160 case nir_intrinsic_load_aa_line_width:
161 case nir_intrinsic_load_xfb_address:
162 case nir_intrinsic_load_num_vertices:
163 case nir_intrinsic_load_fb_layers_v3d:
164 case nir_intrinsic_load_tcs_num_patches_amd:
165 case nir_intrinsic_load_patch_vertices_in:
166 case nir_intrinsic_load_ring_tess_factors_amd:
167 case nir_intrinsic_load_ring_tess_offchip_amd:
168 case nir_intrinsic_load_ring_tess_factors_offset_amd:
169 case nir_intrinsic_load_ring_tess_offchip_offset_amd:
170 case nir_intrinsic_load_ring_mesh_scratch_amd:
171 case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
172 case nir_intrinsic_load_ring_esgs_amd:
173 case nir_intrinsic_load_ring_es2gs_offset_amd:
174 case nir_intrinsic_load_ring_task_draw_amd:
175 case nir_intrinsic_load_ring_task_payload_amd:
176 case nir_intrinsic_load_sample_positions_amd:
177 case nir_intrinsic_load_rasterization_samples_amd:
178 case nir_intrinsic_load_ring_gsvs_amd:
179 case nir_intrinsic_load_ring_gs2vs_offset_amd:
180 case nir_intrinsic_load_streamout_config_amd:
181 case nir_intrinsic_load_streamout_write_index_amd:
182 case nir_intrinsic_load_streamout_offset_amd:
183 case nir_intrinsic_load_task_ring_entry_amd:
184 case nir_intrinsic_load_ring_attr_amd:
185 case nir_intrinsic_load_ring_attr_offset_amd:
186 case nir_intrinsic_load_provoking_vtx_amd:
187 case nir_intrinsic_load_sample_positions_pan:
188 case nir_intrinsic_load_workgroup_num_input_vertices_amd:
189 case nir_intrinsic_load_workgroup_num_input_primitives_amd:
190 case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
191 case nir_intrinsic_load_prim_gen_query_enabled_amd:
192 case nir_intrinsic_load_prim_xfb_query_enabled_amd:
193 case nir_intrinsic_load_merged_wave_info_amd:
194 case nir_intrinsic_load_clamp_vertex_color_amd:
195 case nir_intrinsic_load_cull_front_face_enabled_amd:
196 case nir_intrinsic_load_cull_back_face_enabled_amd:
197 case nir_intrinsic_load_cull_ccw_amd:
198 case nir_intrinsic_load_cull_small_primitives_enabled_amd:
199 case nir_intrinsic_load_cull_any_enabled_amd:
200 case nir_intrinsic_load_cull_small_prim_precision_amd:
201 case nir_intrinsic_load_user_data_amd:
202 case nir_intrinsic_load_force_vrs_rates_amd:
203 case nir_intrinsic_load_tess_level_inner_default:
204 case nir_intrinsic_load_tess_level_outer_default:
205 case nir_intrinsic_load_scalar_arg_amd:
206 case nir_intrinsic_load_smem_amd:
207 case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd:
208 case nir_intrinsic_load_resume_shader_address_amd:
209 case nir_intrinsic_load_global_const_block_intel:
210 case nir_intrinsic_load_reloc_const_intel:
211 case nir_intrinsic_load_btd_global_arg_addr_intel:
212 case nir_intrinsic_load_btd_local_arg_addr_intel:
213 case nir_intrinsic_load_mesh_inline_data_intel:
214 case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
215 case nir_intrinsic_load_lshs_vertex_stride_amd:
216 case nir_intrinsic_load_esgs_vertex_stride_amd:
217 case nir_intrinsic_load_hs_out_patch_data_offset_amd:
218 case nir_intrinsic_load_clip_half_line_width_amd:
219 case nir_intrinsic_load_num_vertices_per_primitive_amd:
220 case nir_intrinsic_load_streamout_buffer_amd:
221 case nir_intrinsic_load_ordered_id_amd:
222 case nir_intrinsic_load_gs_wave_id_amd:
223 case nir_intrinsic_load_provoking_vtx_in_prim_amd:
224 case nir_intrinsic_load_lds_ngg_scratch_base_amd:
225 case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
226 case nir_intrinsic_load_btd_shader_type_intel:
227 case nir_intrinsic_load_base_workgroup_id:
228 case nir_intrinsic_load_alpha_reference_amd:
229 case nir_intrinsic_load_ubo_uniform_block_intel:
230 case nir_intrinsic_load_ssbo_uniform_block_intel:
231 case nir_intrinsic_load_shared_uniform_block_intel:
232 case nir_intrinsic_load_barycentric_optimize_amd:
233 case nir_intrinsic_load_poly_line_smooth_enabled:
234 case nir_intrinsic_load_rasterization_primitive_amd:
235 case nir_intrinsic_load_global_constant_uniform_block_intel:
236 case nir_intrinsic_cmat_length:
237 is_divergent = false;
238 break;
239
240 /* This is divergent because it specifically loads sequential values into
241 * successive SIMD lanes.
242 */
243 case nir_intrinsic_load_global_block_intel:
244 is_divergent = true;
245 break;
246
247 case nir_intrinsic_decl_reg:
248 is_divergent = nir_intrinsic_divergent(instr);
249 break;
250
251 /* Intrinsics with divergence depending on shader stage and hardware */
252 case nir_intrinsic_load_shader_record_ptr:
253 is_divergent = !(options & nir_divergence_shader_record_ptr_uniform);
254 break;
255 case nir_intrinsic_load_frag_shading_rate:
256 is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
257 break;
258 case nir_intrinsic_load_input:
259 is_divergent = instr->src[0].ssa->divergent;
260
261 if (stage == MESA_SHADER_FRAGMENT) {
262 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
263 } else if (stage == MESA_SHADER_TESS_EVAL) {
264 /* Patch input loads are uniform between vertices of the same primitive. */
265 if (vertex_divergence)
266 is_divergent = false;
267 else
268 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
269 } else {
270 is_divergent = true;
271 }
272 break;
273 case nir_intrinsic_load_per_vertex_input:
274 is_divergent = instr->src[0].ssa->divergent ||
275 instr->src[1].ssa->divergent;
276 if (stage == MESA_SHADER_TESS_CTRL)
277 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
278 if (stage == MESA_SHADER_TESS_EVAL)
279 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
280 else
281 is_divergent = true;
282 break;
283 case nir_intrinsic_load_input_vertex:
284 is_divergent = instr->src[1].ssa->divergent;
285 assert(stage == MESA_SHADER_FRAGMENT);
286 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
287 break;
288 case nir_intrinsic_load_output:
289 is_divergent = instr->src[0].ssa->divergent;
290 switch (stage) {
291 case MESA_SHADER_TESS_CTRL:
292 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
293 break;
294 case MESA_SHADER_FRAGMENT:
295 is_divergent = true;
296 break;
297 case MESA_SHADER_TASK:
298 case MESA_SHADER_MESH:
299 /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs).
300 * Divergent if src[0] is, so nothing else to do.
301 */
302 break;
303 default:
304 unreachable("Invalid stage for load_output");
305 }
306 break;
307 case nir_intrinsic_load_per_vertex_output:
308 /* TCS and NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
309 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
310 is_divergent = instr->src[0].ssa->divergent ||
311 instr->src[1].ssa->divergent ||
312 (stage == MESA_SHADER_TESS_CTRL &&
313 !(options & nir_divergence_single_patch_per_tcs_subgroup));
314 break;
315 case nir_intrinsic_load_per_primitive_output:
316 /* NV_mesh_shader only (EXT_mesh_shader does not allow loading outputs). */
317 assert(stage == MESA_SHADER_MESH);
318 is_divergent = instr->src[0].ssa->divergent ||
319 instr->src[1].ssa->divergent;
320 break;
321 case nir_intrinsic_load_layer_id:
322 case nir_intrinsic_load_front_face:
323 assert(stage == MESA_SHADER_FRAGMENT);
324 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
325 break;
326 case nir_intrinsic_load_view_index:
327 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
328 if (options & nir_divergence_view_index_uniform)
329 is_divergent = false;
330 else if (stage == MESA_SHADER_FRAGMENT)
331 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
332 break;
333 case nir_intrinsic_load_fs_input_interp_deltas:
334 assert(stage == MESA_SHADER_FRAGMENT);
335 is_divergent = instr->src[0].ssa->divergent;
336 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
337 break;
338 case nir_intrinsic_load_primitive_id:
339 if (stage == MESA_SHADER_FRAGMENT)
340 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
341 else if (stage == MESA_SHADER_TESS_CTRL)
342 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
343 else if (stage == MESA_SHADER_TESS_EVAL)
344 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
345 else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
346 is_divergent = true;
347 else if (stage == MESA_SHADER_ANY_HIT ||
348 stage == MESA_SHADER_CLOSEST_HIT ||
349 stage == MESA_SHADER_INTERSECTION)
350 is_divergent = true;
351 else
352 unreachable("Invalid stage for load_primitive_id");
353 break;
354 case nir_intrinsic_load_tess_level_inner:
355 case nir_intrinsic_load_tess_level_outer:
356 if (stage == MESA_SHADER_TESS_CTRL)
357 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
358 else if (stage == MESA_SHADER_TESS_EVAL)
359 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
360 else
361 unreachable("Invalid stage for load_primitive_tess_level_*");
362 break;
363
364 case nir_intrinsic_load_workgroup_index:
365 case nir_intrinsic_load_workgroup_id:
366 case nir_intrinsic_load_workgroup_id_zero_base:
367 assert(gl_shader_stage_uses_workgroup(stage));
368 if (stage == MESA_SHADER_COMPUTE)
369 is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
370 break;
371
372 /* Clustered reductions are uniform if cluster_size == subgroup_size or
373 * the source is uniform and the operation is invariant.
374 * Inclusive scans are uniform if
375 * the source is uniform and the operation is invariant
376 */
377 case nir_intrinsic_reduce:
378 if (nir_intrinsic_cluster_size(instr) == 0) {
379 /* Cluster size of 0 means the subgroup size.
380 * This is uniform within a subgroup, but divergent between
381 * vertices of the same primitive because they may be in
382 * different subgroups.
383 */
384 is_divergent = vertex_divergence;
385 break;
386 }
387 FALLTHROUGH;
388 case nir_intrinsic_inclusive_scan: {
389 nir_op op = nir_intrinsic_reduction_op(instr);
390 is_divergent = instr->src[0].ssa->divergent || vertex_divergence;
391 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
392 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
393 op != nir_op_iand && op != nir_op_ior)
394 is_divergent = true;
395 break;
396 }
397
398 case nir_intrinsic_load_ubo:
399 case nir_intrinsic_load_ssbo:
400 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
401 instr->src[1].ssa->divergent;
402 break;
403
404 case nir_intrinsic_get_ssbo_size:
405 case nir_intrinsic_deref_buffer_array_length:
406 is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
407 break;
408
409 case nir_intrinsic_image_samples_identical:
410 case nir_intrinsic_image_deref_samples_identical:
411 case nir_intrinsic_bindless_image_samples_identical:
412 case nir_intrinsic_image_fragment_mask_load_amd:
413 case nir_intrinsic_image_deref_fragment_mask_load_amd:
414 case nir_intrinsic_bindless_image_fragment_mask_load_amd:
415 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
416 instr->src[1].ssa->divergent;
417 break;
418
419 case nir_intrinsic_image_texel_address:
420 case nir_intrinsic_image_deref_texel_address:
421 case nir_intrinsic_bindless_image_texel_address:
422 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
423 instr->src[1].ssa->divergent || instr->src[2].ssa->divergent;
424 break;
425
426 case nir_intrinsic_image_load:
427 case nir_intrinsic_image_deref_load:
428 case nir_intrinsic_bindless_image_load:
429 case nir_intrinsic_image_sparse_load:
430 case nir_intrinsic_image_deref_sparse_load:
431 case nir_intrinsic_bindless_image_sparse_load:
432 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
433 instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
434 break;
435
436 case nir_intrinsic_optimization_barrier_vgpr_amd:
437 is_divergent = instr->src[0].ssa->divergent;
438 break;
439
440 /* Intrinsics with divergence depending on sources */
441 case nir_intrinsic_convert_alu_types:
442 case nir_intrinsic_ballot_bitfield_extract:
443 case nir_intrinsic_ballot_find_lsb:
444 case nir_intrinsic_ballot_find_msb:
445 case nir_intrinsic_ballot_bit_count_reduce:
446 case nir_intrinsic_rotate:
447 case nir_intrinsic_shuffle_xor:
448 case nir_intrinsic_shuffle_up:
449 case nir_intrinsic_shuffle_down:
450 case nir_intrinsic_quad_broadcast:
451 case nir_intrinsic_quad_swap_horizontal:
452 case nir_intrinsic_quad_swap_vertical:
453 case nir_intrinsic_quad_swap_diagonal:
454 case nir_intrinsic_quad_vote_any:
455 case nir_intrinsic_quad_vote_all:
456 case nir_intrinsic_load_deref:
457 case nir_intrinsic_load_shared:
458 case nir_intrinsic_load_shared2_amd:
459 case nir_intrinsic_load_global:
460 case nir_intrinsic_load_global_2x32:
461 case nir_intrinsic_load_global_constant:
462 case nir_intrinsic_load_global_amd:
463 case nir_intrinsic_load_uniform:
464 case nir_intrinsic_load_constant:
465 case nir_intrinsic_load_sample_pos_from_id:
466 case nir_intrinsic_load_kernel_input:
467 case nir_intrinsic_load_task_payload:
468 case nir_intrinsic_load_buffer_amd:
469 case nir_intrinsic_load_typed_buffer_amd:
470 case nir_intrinsic_image_samples:
471 case nir_intrinsic_image_deref_samples:
472 case nir_intrinsic_bindless_image_samples:
473 case nir_intrinsic_image_size:
474 case nir_intrinsic_image_deref_size:
475 case nir_intrinsic_bindless_image_size:
476 case nir_intrinsic_image_descriptor_amd:
477 case nir_intrinsic_image_deref_descriptor_amd:
478 case nir_intrinsic_bindless_image_descriptor_amd:
479 case nir_intrinsic_strict_wqm_coord_amd:
480 case nir_intrinsic_copy_deref:
481 case nir_intrinsic_vulkan_resource_index:
482 case nir_intrinsic_vulkan_resource_reindex:
483 case nir_intrinsic_load_vulkan_descriptor:
484 case nir_intrinsic_atomic_counter_read:
485 case nir_intrinsic_atomic_counter_read_deref:
486 case nir_intrinsic_quad_swizzle_amd:
487 case nir_intrinsic_masked_swizzle_amd:
488 case nir_intrinsic_is_sparse_texels_resident:
489 case nir_intrinsic_sparse_residency_code_and:
490 case nir_intrinsic_bvh64_intersect_ray_amd:
491 case nir_intrinsic_image_deref_load_param_intel:
492 case nir_intrinsic_image_load_raw_intel:
493 case nir_intrinsic_get_ubo_size:
494 case nir_intrinsic_load_ssbo_address:
495 case nir_intrinsic_load_desc_set_address_intel:
496 case nir_intrinsic_load_desc_set_dynamic_index_intel:
497 case nir_intrinsic_load_global_constant_bounded:
498 case nir_intrinsic_load_global_constant_offset:
499 case nir_intrinsic_resource_intel:
500 case nir_intrinsic_load_reg:
501 case nir_intrinsic_load_reg_indirect: {
502 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
503 for (unsigned i = 0; i < num_srcs; i++) {
504 if (instr->src[i].ssa->divergent) {
505 is_divergent = true;
506 break;
507 }
508 }
509 break;
510 }
511
512 case nir_intrinsic_shuffle:
513 is_divergent = instr->src[0].ssa->divergent &&
514 instr->src[1].ssa->divergent;
515 break;
516
517 /* Intrinsics which are always divergent */
518 case nir_intrinsic_inverse_ballot:
519 case nir_intrinsic_load_color0:
520 case nir_intrinsic_load_color1:
521 case nir_intrinsic_load_param:
522 case nir_intrinsic_load_sample_id:
523 case nir_intrinsic_load_sample_id_no_per_sample:
524 case nir_intrinsic_load_sample_mask_in:
525 case nir_intrinsic_load_interpolated_input:
526 case nir_intrinsic_load_point_coord_maybe_flipped:
527 case nir_intrinsic_load_barycentric_pixel:
528 case nir_intrinsic_load_barycentric_centroid:
529 case nir_intrinsic_load_barycentric_sample:
530 case nir_intrinsic_load_barycentric_model:
531 case nir_intrinsic_load_barycentric_at_sample:
532 case nir_intrinsic_load_barycentric_at_offset:
533 case nir_intrinsic_load_barycentric_at_offset_nv:
534 case nir_intrinsic_load_barycentric_coord_pixel:
535 case nir_intrinsic_load_barycentric_coord_centroid:
536 case nir_intrinsic_load_barycentric_coord_sample:
537 case nir_intrinsic_load_barycentric_coord_at_sample:
538 case nir_intrinsic_load_barycentric_coord_at_offset:
539 case nir_intrinsic_interp_deref_at_offset:
540 case nir_intrinsic_interp_deref_at_sample:
541 case nir_intrinsic_interp_deref_at_centroid:
542 case nir_intrinsic_interp_deref_at_vertex:
543 case nir_intrinsic_load_tess_coord:
544 case nir_intrinsic_load_tess_coord_xy:
545 case nir_intrinsic_load_point_coord:
546 case nir_intrinsic_load_line_coord:
547 case nir_intrinsic_load_frag_coord:
548 case nir_intrinsic_load_frag_coord_zw:
549 case nir_intrinsic_load_pixel_coord:
550 case nir_intrinsic_load_fully_covered:
551 case nir_intrinsic_load_sample_pos:
552 case nir_intrinsic_load_sample_pos_or_center:
553 case nir_intrinsic_load_vertex_id_zero_base:
554 case nir_intrinsic_load_vertex_id:
555 case nir_intrinsic_load_instance_id:
556 case nir_intrinsic_load_invocation_id:
557 case nir_intrinsic_load_local_invocation_id:
558 case nir_intrinsic_load_local_invocation_index:
559 case nir_intrinsic_load_global_invocation_id:
560 case nir_intrinsic_load_global_invocation_id_zero_base:
561 case nir_intrinsic_load_global_invocation_index:
562 case nir_intrinsic_load_subgroup_invocation:
563 case nir_intrinsic_load_helper_invocation:
564 case nir_intrinsic_is_helper_invocation:
565 case nir_intrinsic_load_scratch:
566 case nir_intrinsic_deref_atomic:
567 case nir_intrinsic_deref_atomic_swap:
568 case nir_intrinsic_ssbo_atomic:
569 case nir_intrinsic_ssbo_atomic_swap:
570 case nir_intrinsic_image_deref_atomic:
571 case nir_intrinsic_image_deref_atomic_swap:
572 case nir_intrinsic_image_atomic:
573 case nir_intrinsic_image_atomic_swap:
574 case nir_intrinsic_bindless_image_atomic:
575 case nir_intrinsic_bindless_image_atomic_swap:
576 case nir_intrinsic_shared_atomic:
577 case nir_intrinsic_shared_atomic_swap:
578 case nir_intrinsic_task_payload_atomic:
579 case nir_intrinsic_task_payload_atomic_swap:
580 case nir_intrinsic_global_atomic:
581 case nir_intrinsic_global_atomic_swap:
582 case nir_intrinsic_global_atomic_amd:
583 case nir_intrinsic_global_atomic_swap_amd:
584 case nir_intrinsic_global_atomic_2x32:
585 case nir_intrinsic_global_atomic_swap_2x32:
586 case nir_intrinsic_atomic_counter_add:
587 case nir_intrinsic_atomic_counter_min:
588 case nir_intrinsic_atomic_counter_max:
589 case nir_intrinsic_atomic_counter_and:
590 case nir_intrinsic_atomic_counter_or:
591 case nir_intrinsic_atomic_counter_xor:
592 case nir_intrinsic_atomic_counter_inc:
593 case nir_intrinsic_atomic_counter_pre_dec:
594 case nir_intrinsic_atomic_counter_post_dec:
595 case nir_intrinsic_atomic_counter_exchange:
596 case nir_intrinsic_atomic_counter_comp_swap:
597 case nir_intrinsic_atomic_counter_add_deref:
598 case nir_intrinsic_atomic_counter_min_deref:
599 case nir_intrinsic_atomic_counter_max_deref:
600 case nir_intrinsic_atomic_counter_and_deref:
601 case nir_intrinsic_atomic_counter_or_deref:
602 case nir_intrinsic_atomic_counter_xor_deref:
603 case nir_intrinsic_atomic_counter_inc_deref:
604 case nir_intrinsic_atomic_counter_pre_dec_deref:
605 case nir_intrinsic_atomic_counter_post_dec_deref:
606 case nir_intrinsic_atomic_counter_exchange_deref:
607 case nir_intrinsic_atomic_counter_comp_swap_deref:
608 case nir_intrinsic_exclusive_scan:
609 case nir_intrinsic_ballot_bit_count_exclusive:
610 case nir_intrinsic_ballot_bit_count_inclusive:
611 case nir_intrinsic_write_invocation_amd:
612 case nir_intrinsic_mbcnt_amd:
613 case nir_intrinsic_lane_permute_16_amd:
614 case nir_intrinsic_elect:
615 case nir_intrinsic_load_tlb_color_v3d:
616 case nir_intrinsic_load_tess_rel_patch_id_amd:
617 case nir_intrinsic_load_gs_vertex_offset_amd:
618 case nir_intrinsic_is_subgroup_invocation_lt_amd:
619 case nir_intrinsic_load_packed_passthrough_primitive_amd:
620 case nir_intrinsic_load_initial_edgeflags_amd:
621 case nir_intrinsic_gds_atomic_add_amd:
622 case nir_intrinsic_load_rt_arg_scratch_offset_amd:
623 case nir_intrinsic_load_intersection_opaque_amd:
624 case nir_intrinsic_load_vector_arg_amd:
625 case nir_intrinsic_load_btd_stack_id_intel:
626 case nir_intrinsic_load_topology_id_intel:
627 case nir_intrinsic_load_scratch_base_ptr:
628 case nir_intrinsic_ordered_xfb_counter_add_amd:
629 case nir_intrinsic_xfb_counter_sub_amd:
630 case nir_intrinsic_load_stack:
631 case nir_intrinsic_load_ray_launch_id:
632 case nir_intrinsic_load_ray_instance_custom_index:
633 case nir_intrinsic_load_ray_geometry_index:
634 case nir_intrinsic_load_ray_world_direction:
635 case nir_intrinsic_load_ray_world_origin:
636 case nir_intrinsic_load_ray_object_origin:
637 case nir_intrinsic_load_ray_object_direction:
638 case nir_intrinsic_load_ray_t_min:
639 case nir_intrinsic_load_ray_t_max:
640 case nir_intrinsic_load_ray_object_to_world:
641 case nir_intrinsic_load_ray_world_to_object:
642 case nir_intrinsic_load_ray_hit_kind:
643 case nir_intrinsic_load_ray_flags:
644 case nir_intrinsic_load_cull_mask:
645 case nir_intrinsic_load_sysval_nv:
646 case nir_intrinsic_emit_vertex_nv:
647 case nir_intrinsic_end_primitive_nv:
648 case nir_intrinsic_report_ray_intersection:
649 case nir_intrinsic_rq_proceed:
650 case nir_intrinsic_rq_load:
651 case nir_intrinsic_load_ray_triangle_vertex_positions:
652 case nir_intrinsic_cmat_extract:
653 case nir_intrinsic_cmat_muladd_amd:
654 case nir_intrinsic_dpas_intel:
655 case nir_intrinsic_isberd_nv:
656 case nir_intrinsic_al2p_nv:
657 case nir_intrinsic_ald_nv:
658 case nir_intrinsic_ipa_nv:
659 case nir_intrinsic_ldtram_nv:
660 case nir_intrinsic_printf:
661 is_divergent = true;
662 break;
663
664 default:
665 #ifdef NDEBUG
666 is_divergent = true;
667 break;
668 #else
669 nir_print_instr(&instr->instr, stderr);
670 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
671 #endif
672 }
673
674 instr->def.divergent = is_divergent;
675 return is_divergent;
676 }
677
678 static bool
visit_tex(nir_tex_instr * instr)679 visit_tex(nir_tex_instr *instr)
680 {
681 if (instr->def.divergent)
682 return false;
683
684 bool is_divergent = false;
685
686 for (unsigned i = 0; i < instr->num_srcs; i++) {
687 switch (instr->src[i].src_type) {
688 case nir_tex_src_sampler_deref:
689 case nir_tex_src_sampler_handle:
690 case nir_tex_src_sampler_offset:
691 is_divergent |= instr->src[i].src.ssa->divergent &&
692 instr->sampler_non_uniform;
693 break;
694 case nir_tex_src_texture_deref:
695 case nir_tex_src_texture_handle:
696 case nir_tex_src_texture_offset:
697 is_divergent |= instr->src[i].src.ssa->divergent &&
698 instr->texture_non_uniform;
699 break;
700 default:
701 is_divergent |= instr->src[i].src.ssa->divergent;
702 break;
703 }
704 }
705
706 instr->def.divergent = is_divergent;
707 return is_divergent;
708 }
709
710 static bool
visit_load_const(nir_load_const_instr * instr)711 visit_load_const(nir_load_const_instr *instr)
712 {
713 return false;
714 }
715
716 static bool
visit_ssa_undef(nir_undef_instr * instr)717 visit_ssa_undef(nir_undef_instr *instr)
718 {
719 return false;
720 }
721
722 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)723 nir_variable_mode_is_uniform(nir_variable_mode mode)
724 {
725 switch (mode) {
726 case nir_var_uniform:
727 case nir_var_mem_ubo:
728 case nir_var_mem_ssbo:
729 case nir_var_mem_shared:
730 case nir_var_mem_task_payload:
731 case nir_var_mem_global:
732 case nir_var_image:
733 return true;
734 default:
735 return false;
736 }
737 }
738
739 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)740 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
741 {
742 if (nir_variable_mode_is_uniform(var->data.mode))
743 return true;
744
745 nir_divergence_options options = shader->options->divergence_analysis_options;
746 gl_shader_stage stage = shader->info.stage;
747
748 if (stage == MESA_SHADER_FRAGMENT &&
749 (options & nir_divergence_single_prim_per_subgroup) &&
750 var->data.mode == nir_var_shader_in &&
751 var->data.interpolation == INTERP_MODE_FLAT)
752 return true;
753
754 if (stage == MESA_SHADER_TESS_CTRL &&
755 (options & nir_divergence_single_patch_per_tcs_subgroup) &&
756 var->data.mode == nir_var_shader_out && var->data.patch)
757 return true;
758
759 if (stage == MESA_SHADER_TESS_EVAL &&
760 (options & nir_divergence_single_patch_per_tes_subgroup) &&
761 var->data.mode == nir_var_shader_in && var->data.patch)
762 return true;
763
764 return false;
765 }
766
767 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)768 visit_deref(nir_shader *shader, nir_deref_instr *deref)
769 {
770 if (deref->def.divergent)
771 return false;
772
773 bool is_divergent = false;
774 switch (deref->deref_type) {
775 case nir_deref_type_var:
776 is_divergent = !nir_variable_is_uniform(shader, deref->var);
777 break;
778 case nir_deref_type_array:
779 case nir_deref_type_ptr_as_array:
780 is_divergent = deref->arr.index.ssa->divergent;
781 FALLTHROUGH;
782 case nir_deref_type_struct:
783 case nir_deref_type_array_wildcard:
784 is_divergent |= deref->parent.ssa->divergent;
785 break;
786 case nir_deref_type_cast:
787 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
788 deref->parent.ssa->divergent;
789 break;
790 }
791
792 deref->def.divergent = is_divergent;
793 return is_divergent;
794 }
795
796 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)797 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
798 {
799 switch (jump->type) {
800 case nir_jump_continue:
801 if (state->divergent_loop_continue)
802 return false;
803 if (state->divergent_loop_cf)
804 state->divergent_loop_continue = true;
805 return state->divergent_loop_continue;
806 case nir_jump_break:
807 if (state->divergent_loop_break)
808 return false;
809 if (state->divergent_loop_cf)
810 state->divergent_loop_break = true;
811 return state->divergent_loop_break;
812 case nir_jump_halt:
813 /* This totally kills invocations so it doesn't add divergence */
814 break;
815 case nir_jump_return:
816 unreachable("NIR divergence analysis: Unsupported return instruction.");
817 break;
818 case nir_jump_goto:
819 case nir_jump_goto_if:
820 unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
821 break;
822 }
823 return false;
824 }
825
826 static bool
set_ssa_def_not_divergent(nir_def * def,UNUSED void * _state)827 set_ssa_def_not_divergent(nir_def *def, UNUSED void *_state)
828 {
829 def->divergent = false;
830 return true;
831 }
832
833 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr,bool vertex_divergence)834 update_instr_divergence(nir_shader *shader, nir_instr *instr,
835 bool vertex_divergence)
836 {
837 switch (instr->type) {
838 case nir_instr_type_alu:
839 return visit_alu(nir_instr_as_alu(instr));
840 case nir_instr_type_intrinsic:
841 return visit_intrinsic(shader, nir_instr_as_intrinsic(instr),
842 vertex_divergence);
843 case nir_instr_type_tex:
844 return visit_tex(nir_instr_as_tex(instr));
845 case nir_instr_type_load_const:
846 return visit_load_const(nir_instr_as_load_const(instr));
847 case nir_instr_type_undef:
848 return visit_ssa_undef(nir_instr_as_undef(instr));
849 case nir_instr_type_deref:
850 return visit_deref(shader, nir_instr_as_deref(instr));
851 case nir_instr_type_jump:
852 case nir_instr_type_phi:
853 case nir_instr_type_call:
854 case nir_instr_type_parallel_copy:
855 default:
856 unreachable("NIR divergence analysis: Unsupported instruction type.");
857 }
858 }
859
860 static bool
visit_block(nir_block * block,struct divergence_state * state)861 visit_block(nir_block *block, struct divergence_state *state)
862 {
863 bool has_changed = false;
864
865 nir_foreach_instr(instr, block) {
866 /* phis are handled when processing the branches */
867 if (instr->type == nir_instr_type_phi)
868 continue;
869
870 if (state->first_visit)
871 nir_foreach_def(instr, set_ssa_def_not_divergent, NULL);
872
873 if (instr->type == nir_instr_type_jump) {
874 has_changed |= visit_jump(nir_instr_as_jump(instr), state);
875 } else {
876 has_changed |= update_instr_divergence(state->shader, instr,
877 state->vertex_divergence);
878 }
879 }
880
881 return has_changed;
882 }
883
884 /* There are 3 types of phi instructions:
885 * (1) gamma: represent the joining point of different paths
886 * created by an “if-then-else” branch.
887 * The resulting value is divergent if the branch condition
888 * or any of the source values is divergent. */
889 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)890 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
891 {
892 if (phi->def.divergent)
893 return false;
894
895 unsigned defined_srcs = 0;
896 nir_foreach_phi_src(src, phi) {
897 /* if any source value is divergent, the resulting value is divergent */
898 if (src->src.ssa->divergent) {
899 phi->def.divergent = true;
900 return true;
901 }
902 if (src->src.ssa->parent_instr->type != nir_instr_type_undef) {
903 defined_srcs++;
904 }
905 }
906
907 /* if the condition is divergent and two sources defined, the definition is divergent */
908 if (defined_srcs > 1 && if_cond_divergent) {
909 phi->def.divergent = true;
910 return true;
911 }
912
913 return false;
914 }
915
916 /* There are 3 types of phi instructions:
917 * (2) mu: which only exist at loop headers,
918 * merge initial and loop-carried values.
919 * The resulting value is divergent if any source value
920 * is divergent or a divergent loop continue condition
921 * is associated with a different ssa-def. */
922 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)923 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
924 {
925 if (phi->def.divergent)
926 return false;
927
928 nir_def *same = NULL;
929 nir_foreach_phi_src(src, phi) {
930 /* if any source value is divergent, the resulting value is divergent */
931 if (src->src.ssa->divergent) {
932 phi->def.divergent = true;
933 return true;
934 }
935 /* if this loop is uniform, we're done here */
936 if (!divergent_continue)
937 continue;
938 /* skip the loop preheader */
939 if (src->pred == preheader)
940 continue;
941 /* skip undef values */
942 if (nir_src_is_undef(src->src))
943 continue;
944
945 /* check if all loop-carried values are from the same ssa-def */
946 if (!same)
947 same = src->src.ssa;
948 else if (same != src->src.ssa) {
949 phi->def.divergent = true;
950 return true;
951 }
952 }
953
954 return false;
955 }
956
957 /* There are 3 types of phi instructions:
958 * (3) eta: represent values that leave a loop.
959 * The resulting value is divergent if the source value is divergent
960 * or any loop exit condition is divergent for a value which is
961 * not loop-invariant.
962 * (note: there should be no phi for loop-invariant variables.) */
963 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)964 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
965 {
966 if (phi->def.divergent)
967 return false;
968
969 if (divergent_break) {
970 phi->def.divergent = true;
971 return true;
972 }
973
974 /* if any source value is divergent, the resulting value is divergent */
975 nir_foreach_phi_src(src, phi) {
976 if (src->src.ssa->divergent) {
977 phi->def.divergent = true;
978 return true;
979 }
980 }
981
982 return false;
983 }
984
985 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)986 visit_if(nir_if *if_stmt, struct divergence_state *state)
987 {
988 bool progress = false;
989
990 struct divergence_state then_state = *state;
991 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
992 progress |= visit_cf_list(&if_stmt->then_list, &then_state);
993
994 struct divergence_state else_state = *state;
995 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
996 progress |= visit_cf_list(&if_stmt->else_list, &else_state);
997
998 /* handle phis after the IF */
999 nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
1000 if (state->first_visit)
1001 phi->def.divergent = false;
1002 progress |= visit_if_merge_phi(phi, if_stmt->condition.ssa->divergent);
1003 }
1004
1005 /* join loop divergence information from both branch legs */
1006 state->divergent_loop_continue |= then_state.divergent_loop_continue ||
1007 else_state.divergent_loop_continue;
1008 state->divergent_loop_break |= then_state.divergent_loop_break ||
1009 else_state.divergent_loop_break;
1010
1011 /* A divergent continue makes succeeding loop CF divergent:
1012 * not all loop-active invocations participate in the remaining loop-body
1013 * which means that a following break might be taken by some invocations, only */
1014 state->divergent_loop_cf |= state->divergent_loop_continue;
1015
1016 return progress;
1017 }
1018
1019 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)1020 visit_loop(nir_loop *loop, struct divergence_state *state)
1021 {
1022 assert(!nir_loop_has_continue_construct(loop));
1023 bool progress = false;
1024 nir_block *loop_header = nir_loop_first_block(loop);
1025 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
1026
1027 /* handle loop header phis first: we have no knowledge yet about
1028 * the loop's control flow or any loop-carried sources. */
1029 nir_foreach_phi(phi, loop_header) {
1030 if (!state->first_visit && phi->def.divergent)
1031 continue;
1032
1033 nir_foreach_phi_src(src, phi) {
1034 if (src->pred == loop_preheader) {
1035 phi->def.divergent = src->src.ssa->divergent;
1036 break;
1037 }
1038 }
1039 progress |= phi->def.divergent;
1040 }
1041
1042 /* setup loop state */
1043 struct divergence_state loop_state = *state;
1044 loop_state.divergent_loop_cf = false;
1045 loop_state.divergent_loop_continue = false;
1046 loop_state.divergent_loop_break = false;
1047
1048 /* process loop body until no further changes are made */
1049 bool repeat;
1050 do {
1051 progress |= visit_cf_list(&loop->body, &loop_state);
1052 repeat = false;
1053
1054 /* revisit loop header phis to see if something has changed */
1055 nir_foreach_phi(phi, loop_header) {
1056 repeat |= visit_loop_header_phi(phi, loop_preheader,
1057 loop_state.divergent_loop_continue);
1058 }
1059
1060 loop_state.divergent_loop_cf = false;
1061 loop_state.first_visit = false;
1062 } while (repeat);
1063
1064 /* handle phis after the loop */
1065 nir_foreach_phi(phi, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1066 if (state->first_visit)
1067 phi->def.divergent = false;
1068 progress |= visit_loop_exit_phi(phi, loop_state.divergent_loop_break);
1069 }
1070
1071 loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
1072
1073 return progress;
1074 }
1075
1076 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)1077 visit_cf_list(struct exec_list *list, struct divergence_state *state)
1078 {
1079 bool has_changed = false;
1080
1081 foreach_list_typed(nir_cf_node, node, node, list) {
1082 switch (node->type) {
1083 case nir_cf_node_block:
1084 has_changed |= visit_block(nir_cf_node_as_block(node), state);
1085 break;
1086 case nir_cf_node_if:
1087 has_changed |= visit_if(nir_cf_node_as_if(node), state);
1088 break;
1089 case nir_cf_node_loop:
1090 has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1091 break;
1092 case nir_cf_node_function:
1093 unreachable("NIR divergence analysis: Unsupported cf_node type.");
1094 }
1095 }
1096
1097 return has_changed;
1098 }
1099
1100 void
nir_divergence_analysis(nir_shader * shader)1101 nir_divergence_analysis(nir_shader *shader)
1102 {
1103 shader->info.divergence_analysis_run = true;
1104
1105 struct divergence_state state = {
1106 .stage = shader->info.stage,
1107 .shader = shader,
1108 .divergent_loop_cf = false,
1109 .divergent_loop_continue = false,
1110 .divergent_loop_break = false,
1111 .first_visit = true,
1112 };
1113
1114 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1115 }
1116
1117 /* Compute divergence between vertices of the same primitive. This uses
1118 * the same divergent field in nir_def and nir_loop as the regular divergence
1119 * pass.
1120 */
1121 void
nir_vertex_divergence_analysis(nir_shader * shader)1122 nir_vertex_divergence_analysis(nir_shader *shader)
1123 {
1124 shader->info.divergence_analysis_run = false;
1125
1126 struct divergence_state state = {
1127 .stage = shader->info.stage,
1128 .shader = shader,
1129 .vertex_divergence = true,
1130 .first_visit = true,
1131 };
1132
1133 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1134 }
1135
1136 bool
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)1137 nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1138 {
1139 nir_foreach_def(instr, set_ssa_def_not_divergent, NULL);
1140
1141 if (instr->type == nir_instr_type_phi) {
1142 nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1143 /* can only update gamma/if phis */
1144 if (!prev || prev->type != nir_cf_node_if)
1145 return false;
1146
1147 nir_if *nif = nir_cf_node_as_if(prev);
1148
1149 visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1150 return true;
1151 }
1152
1153 update_instr_divergence(shader, instr, false);
1154 return true;
1155 }
1156
1157 bool
nir_has_divergent_loop(nir_shader * shader)1158 nir_has_divergent_loop(nir_shader *shader)
1159 {
1160 bool divergent_loop = false;
1161 nir_function_impl *func = nir_shader_get_entrypoint(shader);
1162
1163 foreach_list_typed(nir_cf_node, node, node, &func->body) {
1164 if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) {
1165 divergent_loop = true;
1166 break;
1167 }
1168 }
1169
1170 return divergent_loop;
1171 }
1172