1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "nir.h"
26
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39 struct divergence_state {
40 const gl_shader_stage stage;
41 nir_shader *shader;
42
43 /** current control flow state */
44 /* True if some loop-active invocations might take a different control-flow path.
45 * A divergent break does not cause subsequent control-flow to be considered
46 * divergent because those invocations are no longer active in the loop.
47 * For a divergent if, both sides are considered divergent flow because
48 * the other side is still loop-active. */
49 bool divergent_loop_cf;
50 /* True if a divergent continue happened since the loop header */
51 bool divergent_loop_continue;
52 /* True if a divergent break happened since the loop header */
53 bool divergent_loop_break;
54
55 /* True if we visit the block for the fist time */
56 bool first_visit;
57 };
58
59 static bool
60 visit_cf_list(struct exec_list *list, struct divergence_state *state);
61
62 static bool
visit_alu(nir_alu_instr * instr)63 visit_alu(nir_alu_instr *instr)
64 {
65 if (instr->dest.dest.ssa.divergent)
66 return false;
67
68 unsigned num_src = nir_op_infos[instr->op].num_inputs;
69
70 for (unsigned i = 0; i < num_src; i++) {
71 if (instr->src[i].src.ssa->divergent) {
72 instr->dest.dest.ssa.divergent = true;
73 return true;
74 }
75 }
76
77 return false;
78 }
79
80 static bool
visit_intrinsic(nir_shader * shader,nir_intrinsic_instr * instr)81 visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82 {
83 if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84 return false;
85
86 if (instr->dest.ssa.divergent)
87 return false;
88
89 nir_divergence_options options = shader->options->divergence_analysis_options;
90 gl_shader_stage stage = shader->info.stage;
91 bool is_divergent = false;
92 switch (instr->intrinsic) {
93 /* Intrinsics which are always uniform */
94 case nir_intrinsic_shader_clock:
95 case nir_intrinsic_ballot:
96 case nir_intrinsic_read_invocation:
97 case nir_intrinsic_read_first_invocation:
98 case nir_intrinsic_vote_any:
99 case nir_intrinsic_vote_all:
100 case nir_intrinsic_vote_feq:
101 case nir_intrinsic_vote_ieq:
102 case nir_intrinsic_load_push_constant:
103 case nir_intrinsic_load_work_dim:
104 case nir_intrinsic_load_num_workgroups:
105 case nir_intrinsic_load_workgroup_size:
106 case nir_intrinsic_load_subgroup_id:
107 case nir_intrinsic_load_num_subgroups:
108 case nir_intrinsic_load_ray_launch_size:
109 case nir_intrinsic_load_ray_launch_size_addr_amd:
110 case nir_intrinsic_load_sbt_base_amd:
111 case nir_intrinsic_load_subgroup_size:
112 case nir_intrinsic_load_subgroup_eq_mask:
113 case nir_intrinsic_load_subgroup_ge_mask:
114 case nir_intrinsic_load_subgroup_gt_mask:
115 case nir_intrinsic_load_subgroup_le_mask:
116 case nir_intrinsic_load_subgroup_lt_mask:
117 case nir_intrinsic_first_invocation:
118 case nir_intrinsic_last_invocation:
119 case nir_intrinsic_load_base_instance:
120 case nir_intrinsic_load_base_vertex:
121 case nir_intrinsic_load_first_vertex:
122 case nir_intrinsic_load_draw_id:
123 case nir_intrinsic_load_is_indexed_draw:
124 case nir_intrinsic_load_viewport_scale:
125 case nir_intrinsic_load_user_clip_plane:
126 case nir_intrinsic_load_viewport_x_scale:
127 case nir_intrinsic_load_viewport_y_scale:
128 case nir_intrinsic_load_viewport_z_scale:
129 case nir_intrinsic_load_viewport_offset:
130 case nir_intrinsic_load_viewport_x_offset:
131 case nir_intrinsic_load_viewport_y_offset:
132 case nir_intrinsic_load_viewport_z_offset:
133 case nir_intrinsic_load_blend_const_color_a_float:
134 case nir_intrinsic_load_blend_const_color_b_float:
135 case nir_intrinsic_load_blend_const_color_g_float:
136 case nir_intrinsic_load_blend_const_color_r_float:
137 case nir_intrinsic_load_blend_const_color_rgba:
138 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
139 case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
140 case nir_intrinsic_load_line_width:
141 case nir_intrinsic_load_aa_line_width:
142 case nir_intrinsic_load_xfb_address:
143 case nir_intrinsic_load_num_vertices:
144 case nir_intrinsic_load_fb_layers_v3d:
145 case nir_intrinsic_load_tcs_num_patches_amd:
146 case nir_intrinsic_load_ring_tess_factors_amd:
147 case nir_intrinsic_load_ring_tess_offchip_amd:
148 case nir_intrinsic_load_ring_tess_factors_offset_amd:
149 case nir_intrinsic_load_ring_tess_offchip_offset_amd:
150 case nir_intrinsic_load_ring_mesh_scratch_amd:
151 case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
152 case nir_intrinsic_load_ring_esgs_amd:
153 case nir_intrinsic_load_ring_es2gs_offset_amd:
154 case nir_intrinsic_load_ring_task_draw_amd:
155 case nir_intrinsic_load_ring_task_payload_amd:
156 case nir_intrinsic_load_task_ring_entry_amd:
157 case nir_intrinsic_load_task_ib_addr:
158 case nir_intrinsic_load_task_ib_stride:
159 case nir_intrinsic_load_sample_positions_pan:
160 case nir_intrinsic_load_workgroup_num_input_vertices_amd:
161 case nir_intrinsic_load_workgroup_num_input_primitives_amd:
162 case nir_intrinsic_load_shader_query_enabled_amd:
163 case nir_intrinsic_load_cull_front_face_enabled_amd:
164 case nir_intrinsic_load_cull_back_face_enabled_amd:
165 case nir_intrinsic_load_cull_ccw_amd:
166 case nir_intrinsic_load_cull_small_primitives_enabled_amd:
167 case nir_intrinsic_load_cull_any_enabled_amd:
168 case nir_intrinsic_load_cull_small_prim_precision_amd:
169 case nir_intrinsic_load_user_data_amd:
170 case nir_intrinsic_load_force_vrs_rates_amd:
171 case nir_intrinsic_load_tess_level_inner_default:
172 case nir_intrinsic_load_tess_level_outer_default:
173 case nir_intrinsic_load_scalar_arg_amd:
174 case nir_intrinsic_load_smem_amd:
175 case nir_intrinsic_load_global_const_block_intel:
176 case nir_intrinsic_load_reloc_const_intel:
177 case nir_intrinsic_load_global_block_intel:
178 case nir_intrinsic_load_btd_global_arg_addr_intel:
179 case nir_intrinsic_load_btd_local_arg_addr_intel:
180 case nir_intrinsic_load_mesh_inline_data_intel:
181 case nir_intrinsic_load_ray_num_dss_rt_stacks_intel:
182 case nir_intrinsic_load_lshs_vertex_stride_amd:
183 case nir_intrinsic_load_hs_out_patch_data_offset_amd:
184 is_divergent = false;
185 break;
186
187 /* Intrinsics with divergence depending on shader stage and hardware */
188 case nir_intrinsic_load_frag_shading_rate:
189 is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
190 break;
191 case nir_intrinsic_load_input:
192 is_divergent = instr->src[0].ssa->divergent;
193 if (stage == MESA_SHADER_FRAGMENT)
194 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
195 else if (stage == MESA_SHADER_TESS_EVAL)
196 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
197 else if (stage != MESA_SHADER_MESH)
198 is_divergent = true;
199 break;
200 case nir_intrinsic_load_per_vertex_input:
201 is_divergent = instr->src[0].ssa->divergent ||
202 instr->src[1].ssa->divergent;
203 if (stage == MESA_SHADER_TESS_CTRL)
204 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
205 if (stage == MESA_SHADER_TESS_EVAL)
206 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
207 else
208 is_divergent = true;
209 break;
210 case nir_intrinsic_load_input_vertex:
211 is_divergent = instr->src[1].ssa->divergent;
212 assert(stage == MESA_SHADER_FRAGMENT);
213 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
214 break;
215 case nir_intrinsic_load_output:
216 is_divergent = instr->src[0].ssa->divergent;
217 switch (stage) {
218 case MESA_SHADER_TESS_CTRL:
219 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
220 break;
221 case MESA_SHADER_FRAGMENT:
222 is_divergent = true;
223 break;
224 case MESA_SHADER_TASK:
225 case MESA_SHADER_MESH:
226 /* Divergent if src[0] is, so nothing else to do. */
227 break;
228 default:
229 unreachable("Invalid stage for load_output");
230 }
231 break;
232 case nir_intrinsic_load_per_vertex_output:
233 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
234 is_divergent = instr->src[0].ssa->divergent ||
235 instr->src[1].ssa->divergent ||
236 (stage == MESA_SHADER_TESS_CTRL &&
237 !(options & nir_divergence_single_patch_per_tcs_subgroup));
238 break;
239 case nir_intrinsic_load_per_primitive_output:
240 assert(stage == MESA_SHADER_MESH);
241 is_divergent = instr->src[0].ssa->divergent ||
242 instr->src[1].ssa->divergent;
243 break;
244 case nir_intrinsic_load_layer_id:
245 case nir_intrinsic_load_front_face:
246 assert(stage == MESA_SHADER_FRAGMENT);
247 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
248 break;
249 case nir_intrinsic_load_view_index:
250 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
251 if (options & nir_divergence_view_index_uniform)
252 is_divergent = false;
253 else if (stage == MESA_SHADER_FRAGMENT)
254 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
255 break;
256 case nir_intrinsic_load_fs_input_interp_deltas:
257 assert(stage == MESA_SHADER_FRAGMENT);
258 is_divergent = instr->src[0].ssa->divergent;
259 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
260 break;
261 case nir_intrinsic_load_primitive_id:
262 if (stage == MESA_SHADER_FRAGMENT)
263 is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
264 else if (stage == MESA_SHADER_TESS_CTRL)
265 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
266 else if (stage == MESA_SHADER_TESS_EVAL)
267 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
268 else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
269 is_divergent = true;
270 else
271 unreachable("Invalid stage for load_primitive_id");
272 break;
273 case nir_intrinsic_load_tess_level_inner:
274 case nir_intrinsic_load_tess_level_outer:
275 if (stage == MESA_SHADER_TESS_CTRL)
276 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
277 else if (stage == MESA_SHADER_TESS_EVAL)
278 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
279 else
280 unreachable("Invalid stage for load_primitive_tess_level_*");
281 break;
282 case nir_intrinsic_load_patch_vertices_in:
283 if (stage == MESA_SHADER_TESS_EVAL)
284 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
285 else
286 assert(stage == MESA_SHADER_TESS_CTRL);
287 break;
288
289 case nir_intrinsic_load_workgroup_index:
290 case nir_intrinsic_load_workgroup_id:
291 assert(gl_shader_stage_uses_workgroup(stage));
292 if (stage == MESA_SHADER_COMPUTE)
293 is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
294 break;
295
296 /* Clustered reductions are uniform if cluster_size == subgroup_size or
297 * the source is uniform and the operation is invariant.
298 * Inclusive scans are uniform if
299 * the source is uniform and the operation is invariant
300 */
301 case nir_intrinsic_reduce:
302 if (nir_intrinsic_cluster_size(instr) == 0)
303 return false;
304 FALLTHROUGH;
305 case nir_intrinsic_inclusive_scan: {
306 nir_op op = nir_intrinsic_reduction_op(instr);
307 is_divergent = instr->src[0].ssa->divergent;
308 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
309 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
310 op != nir_op_iand && op != nir_op_ior)
311 is_divergent = true;
312 break;
313 }
314
315 case nir_intrinsic_load_ubo:
316 case nir_intrinsic_load_ssbo:
317 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
318 instr->src[1].ssa->divergent;
319 break;
320
321 case nir_intrinsic_get_ssbo_size:
322 case nir_intrinsic_deref_buffer_array_length:
323 is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
324 break;
325
326 case nir_intrinsic_image_load:
327 case nir_intrinsic_image_deref_load:
328 case nir_intrinsic_bindless_image_load:
329 case nir_intrinsic_image_sparse_load:
330 case nir_intrinsic_image_deref_sparse_load:
331 case nir_intrinsic_bindless_image_sparse_load:
332 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
333 instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
334 break;
335
336
337 /* Intrinsics with divergence depending on sources */
338 case nir_intrinsic_ballot_bitfield_extract:
339 case nir_intrinsic_ballot_find_lsb:
340 case nir_intrinsic_ballot_find_msb:
341 case nir_intrinsic_ballot_bit_count_reduce:
342 case nir_intrinsic_shuffle_xor:
343 case nir_intrinsic_shuffle_up:
344 case nir_intrinsic_shuffle_down:
345 case nir_intrinsic_quad_broadcast:
346 case nir_intrinsic_quad_swap_horizontal:
347 case nir_intrinsic_quad_swap_vertical:
348 case nir_intrinsic_quad_swap_diagonal:
349 case nir_intrinsic_byte_permute_amd:
350 case nir_intrinsic_load_deref:
351 case nir_intrinsic_load_shared:
352 case nir_intrinsic_load_shared2_amd:
353 case nir_intrinsic_load_global:
354 case nir_intrinsic_load_global_2x32:
355 case nir_intrinsic_load_global_constant:
356 case nir_intrinsic_load_global_amd:
357 case nir_intrinsic_load_uniform:
358 case nir_intrinsic_load_constant:
359 case nir_intrinsic_load_sample_pos_from_id:
360 case nir_intrinsic_load_kernel_input:
361 case nir_intrinsic_load_task_payload:
362 case nir_intrinsic_load_buffer_amd:
363 case nir_intrinsic_image_samples:
364 case nir_intrinsic_image_deref_samples:
365 case nir_intrinsic_bindless_image_samples:
366 case nir_intrinsic_image_size:
367 case nir_intrinsic_image_deref_size:
368 case nir_intrinsic_bindless_image_size:
369 case nir_intrinsic_copy_deref:
370 case nir_intrinsic_vulkan_resource_index:
371 case nir_intrinsic_vulkan_resource_reindex:
372 case nir_intrinsic_load_vulkan_descriptor:
373 case nir_intrinsic_atomic_counter_read:
374 case nir_intrinsic_atomic_counter_read_deref:
375 case nir_intrinsic_quad_swizzle_amd:
376 case nir_intrinsic_masked_swizzle_amd:
377 case nir_intrinsic_is_sparse_texels_resident:
378 case nir_intrinsic_sparse_residency_code_and:
379 case nir_intrinsic_bvh64_intersect_ray_amd:
380 case nir_intrinsic_image_deref_load_param_intel:
381 case nir_intrinsic_image_load_raw_intel:
382 case nir_intrinsic_get_ubo_size:
383 case nir_intrinsic_load_ssbo_address:
384 case nir_intrinsic_load_desc_set_address_intel: {
385 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
386 for (unsigned i = 0; i < num_srcs; i++) {
387 if (instr->src[i].ssa->divergent) {
388 is_divergent = true;
389 break;
390 }
391 }
392 break;
393 }
394
395 case nir_intrinsic_shuffle:
396 is_divergent = instr->src[0].ssa->divergent &&
397 instr->src[1].ssa->divergent;
398 break;
399
400 /* Intrinsics which are always divergent */
401 case nir_intrinsic_load_color0:
402 case nir_intrinsic_load_color1:
403 case nir_intrinsic_load_param:
404 case nir_intrinsic_load_sample_id:
405 case nir_intrinsic_load_sample_id_no_per_sample:
406 case nir_intrinsic_load_sample_mask_in:
407 case nir_intrinsic_load_interpolated_input:
408 case nir_intrinsic_load_point_coord_maybe_flipped:
409 case nir_intrinsic_load_barycentric_pixel:
410 case nir_intrinsic_load_barycentric_centroid:
411 case nir_intrinsic_load_barycentric_sample:
412 case nir_intrinsic_load_barycentric_model:
413 case nir_intrinsic_load_barycentric_at_sample:
414 case nir_intrinsic_load_barycentric_at_offset:
415 case nir_intrinsic_interp_deref_at_offset:
416 case nir_intrinsic_interp_deref_at_sample:
417 case nir_intrinsic_interp_deref_at_centroid:
418 case nir_intrinsic_interp_deref_at_vertex:
419 case nir_intrinsic_load_tess_coord:
420 case nir_intrinsic_load_point_coord:
421 case nir_intrinsic_load_line_coord:
422 case nir_intrinsic_load_frag_coord:
423 case nir_intrinsic_load_sample_pos:
424 case nir_intrinsic_load_sample_pos_or_center:
425 case nir_intrinsic_load_vertex_id_zero_base:
426 case nir_intrinsic_load_vertex_id:
427 case nir_intrinsic_load_instance_id:
428 case nir_intrinsic_load_invocation_id:
429 case nir_intrinsic_load_local_invocation_id:
430 case nir_intrinsic_load_local_invocation_index:
431 case nir_intrinsic_load_global_invocation_id:
432 case nir_intrinsic_load_global_invocation_id_zero_base:
433 case nir_intrinsic_load_global_invocation_index:
434 case nir_intrinsic_load_subgroup_invocation:
435 case nir_intrinsic_load_helper_invocation:
436 case nir_intrinsic_is_helper_invocation:
437 case nir_intrinsic_load_scratch:
438 case nir_intrinsic_deref_atomic_add:
439 case nir_intrinsic_deref_atomic_imin:
440 case nir_intrinsic_deref_atomic_umin:
441 case nir_intrinsic_deref_atomic_imax:
442 case nir_intrinsic_deref_atomic_umax:
443 case nir_intrinsic_deref_atomic_and:
444 case nir_intrinsic_deref_atomic_or:
445 case nir_intrinsic_deref_atomic_xor:
446 case nir_intrinsic_deref_atomic_exchange:
447 case nir_intrinsic_deref_atomic_comp_swap:
448 case nir_intrinsic_deref_atomic_fadd:
449 case nir_intrinsic_deref_atomic_fmin:
450 case nir_intrinsic_deref_atomic_fmax:
451 case nir_intrinsic_deref_atomic_fcomp_swap:
452 case nir_intrinsic_ssbo_atomic_add:
453 case nir_intrinsic_ssbo_atomic_imin:
454 case nir_intrinsic_ssbo_atomic_umin:
455 case nir_intrinsic_ssbo_atomic_imax:
456 case nir_intrinsic_ssbo_atomic_umax:
457 case nir_intrinsic_ssbo_atomic_and:
458 case nir_intrinsic_ssbo_atomic_or:
459 case nir_intrinsic_ssbo_atomic_xor:
460 case nir_intrinsic_ssbo_atomic_exchange:
461 case nir_intrinsic_ssbo_atomic_comp_swap:
462 case nir_intrinsic_ssbo_atomic_fadd:
463 case nir_intrinsic_ssbo_atomic_fmax:
464 case nir_intrinsic_ssbo_atomic_fmin:
465 case nir_intrinsic_ssbo_atomic_fcomp_swap:
466 case nir_intrinsic_image_deref_atomic_add:
467 case nir_intrinsic_image_deref_atomic_imin:
468 case nir_intrinsic_image_deref_atomic_umin:
469 case nir_intrinsic_image_deref_atomic_imax:
470 case nir_intrinsic_image_deref_atomic_umax:
471 case nir_intrinsic_image_deref_atomic_and:
472 case nir_intrinsic_image_deref_atomic_or:
473 case nir_intrinsic_image_deref_atomic_xor:
474 case nir_intrinsic_image_deref_atomic_exchange:
475 case nir_intrinsic_image_deref_atomic_comp_swap:
476 case nir_intrinsic_image_deref_atomic_fadd:
477 case nir_intrinsic_image_deref_atomic_fmin:
478 case nir_intrinsic_image_deref_atomic_fmax:
479 case nir_intrinsic_image_deref_atomic_inc_wrap:
480 case nir_intrinsic_image_deref_atomic_dec_wrap:
481 case nir_intrinsic_image_atomic_add:
482 case nir_intrinsic_image_atomic_imin:
483 case nir_intrinsic_image_atomic_umin:
484 case nir_intrinsic_image_atomic_imax:
485 case nir_intrinsic_image_atomic_umax:
486 case nir_intrinsic_image_atomic_and:
487 case nir_intrinsic_image_atomic_or:
488 case nir_intrinsic_image_atomic_xor:
489 case nir_intrinsic_image_atomic_exchange:
490 case nir_intrinsic_image_atomic_comp_swap:
491 case nir_intrinsic_image_atomic_fadd:
492 case nir_intrinsic_image_atomic_fmin:
493 case nir_intrinsic_image_atomic_fmax:
494 case nir_intrinsic_bindless_image_atomic_add:
495 case nir_intrinsic_bindless_image_atomic_imin:
496 case nir_intrinsic_bindless_image_atomic_umin:
497 case nir_intrinsic_bindless_image_atomic_imax:
498 case nir_intrinsic_bindless_image_atomic_umax:
499 case nir_intrinsic_bindless_image_atomic_and:
500 case nir_intrinsic_bindless_image_atomic_or:
501 case nir_intrinsic_bindless_image_atomic_xor:
502 case nir_intrinsic_bindless_image_atomic_exchange:
503 case nir_intrinsic_bindless_image_atomic_comp_swap:
504 case nir_intrinsic_bindless_image_atomic_fadd:
505 case nir_intrinsic_bindless_image_atomic_fmin:
506 case nir_intrinsic_bindless_image_atomic_fmax:
507 case nir_intrinsic_shared_atomic_add:
508 case nir_intrinsic_shared_atomic_imin:
509 case nir_intrinsic_shared_atomic_umin:
510 case nir_intrinsic_shared_atomic_imax:
511 case nir_intrinsic_shared_atomic_umax:
512 case nir_intrinsic_shared_atomic_and:
513 case nir_intrinsic_shared_atomic_or:
514 case nir_intrinsic_shared_atomic_xor:
515 case nir_intrinsic_shared_atomic_exchange:
516 case nir_intrinsic_shared_atomic_comp_swap:
517 case nir_intrinsic_shared_atomic_fadd:
518 case nir_intrinsic_shared_atomic_fmin:
519 case nir_intrinsic_shared_atomic_fmax:
520 case nir_intrinsic_shared_atomic_fcomp_swap:
521 case nir_intrinsic_task_payload_atomic_add:
522 case nir_intrinsic_task_payload_atomic_imin:
523 case nir_intrinsic_task_payload_atomic_umin:
524 case nir_intrinsic_task_payload_atomic_imax:
525 case nir_intrinsic_task_payload_atomic_umax:
526 case nir_intrinsic_task_payload_atomic_and:
527 case nir_intrinsic_task_payload_atomic_or:
528 case nir_intrinsic_task_payload_atomic_xor:
529 case nir_intrinsic_task_payload_atomic_exchange:
530 case nir_intrinsic_task_payload_atomic_comp_swap:
531 case nir_intrinsic_task_payload_atomic_fadd:
532 case nir_intrinsic_task_payload_atomic_fmin:
533 case nir_intrinsic_task_payload_atomic_fmax:
534 case nir_intrinsic_task_payload_atomic_fcomp_swap:
535 case nir_intrinsic_global_atomic_add:
536 case nir_intrinsic_global_atomic_imin:
537 case nir_intrinsic_global_atomic_umin:
538 case nir_intrinsic_global_atomic_imax:
539 case nir_intrinsic_global_atomic_umax:
540 case nir_intrinsic_global_atomic_and:
541 case nir_intrinsic_global_atomic_or:
542 case nir_intrinsic_global_atomic_xor:
543 case nir_intrinsic_global_atomic_exchange:
544 case nir_intrinsic_global_atomic_comp_swap:
545 case nir_intrinsic_global_atomic_fadd:
546 case nir_intrinsic_global_atomic_fmin:
547 case nir_intrinsic_global_atomic_fmax:
548 case nir_intrinsic_global_atomic_fcomp_swap:
549 case nir_intrinsic_global_atomic_add_amd:
550 case nir_intrinsic_global_atomic_imin_amd:
551 case nir_intrinsic_global_atomic_umin_amd:
552 case nir_intrinsic_global_atomic_imax_amd:
553 case nir_intrinsic_global_atomic_umax_amd:
554 case nir_intrinsic_global_atomic_and_amd:
555 case nir_intrinsic_global_atomic_or_amd:
556 case nir_intrinsic_global_atomic_xor_amd:
557 case nir_intrinsic_global_atomic_exchange_amd:
558 case nir_intrinsic_global_atomic_comp_swap_amd:
559 case nir_intrinsic_global_atomic_fadd_amd:
560 case nir_intrinsic_global_atomic_fmin_amd:
561 case nir_intrinsic_global_atomic_fmax_amd:
562 case nir_intrinsic_global_atomic_fcomp_swap_amd:
563 case nir_intrinsic_global_atomic_add_2x32:
564 case nir_intrinsic_global_atomic_imin_2x32:
565 case nir_intrinsic_global_atomic_umin_2x32:
566 case nir_intrinsic_global_atomic_imax_2x32:
567 case nir_intrinsic_global_atomic_umax_2x32:
568 case nir_intrinsic_global_atomic_and_2x32:
569 case nir_intrinsic_global_atomic_or_2x32:
570 case nir_intrinsic_global_atomic_xor_2x32:
571 case nir_intrinsic_global_atomic_exchange_2x32:
572 case nir_intrinsic_global_atomic_comp_swap_2x32:
573 case nir_intrinsic_global_atomic_fadd_2x32:
574 case nir_intrinsic_global_atomic_fmin_2x32:
575 case nir_intrinsic_global_atomic_fmax_2x32:
576 case nir_intrinsic_global_atomic_fcomp_swap_2x32:
577 case nir_intrinsic_atomic_counter_add:
578 case nir_intrinsic_atomic_counter_min:
579 case nir_intrinsic_atomic_counter_max:
580 case nir_intrinsic_atomic_counter_and:
581 case nir_intrinsic_atomic_counter_or:
582 case nir_intrinsic_atomic_counter_xor:
583 case nir_intrinsic_atomic_counter_inc:
584 case nir_intrinsic_atomic_counter_pre_dec:
585 case nir_intrinsic_atomic_counter_post_dec:
586 case nir_intrinsic_atomic_counter_exchange:
587 case nir_intrinsic_atomic_counter_comp_swap:
588 case nir_intrinsic_atomic_counter_add_deref:
589 case nir_intrinsic_atomic_counter_min_deref:
590 case nir_intrinsic_atomic_counter_max_deref:
591 case nir_intrinsic_atomic_counter_and_deref:
592 case nir_intrinsic_atomic_counter_or_deref:
593 case nir_intrinsic_atomic_counter_xor_deref:
594 case nir_intrinsic_atomic_counter_inc_deref:
595 case nir_intrinsic_atomic_counter_pre_dec_deref:
596 case nir_intrinsic_atomic_counter_post_dec_deref:
597 case nir_intrinsic_atomic_counter_exchange_deref:
598 case nir_intrinsic_atomic_counter_comp_swap_deref:
599 case nir_intrinsic_exclusive_scan:
600 case nir_intrinsic_ballot_bit_count_exclusive:
601 case nir_intrinsic_ballot_bit_count_inclusive:
602 case nir_intrinsic_write_invocation_amd:
603 case nir_intrinsic_mbcnt_amd:
604 case nir_intrinsic_lane_permute_16_amd:
605 case nir_intrinsic_elect:
606 case nir_intrinsic_load_tlb_color_v3d:
607 case nir_intrinsic_load_tess_rel_patch_id_amd:
608 case nir_intrinsic_load_gs_vertex_offset_amd:
609 case nir_intrinsic_has_input_vertex_amd:
610 case nir_intrinsic_has_input_primitive_amd:
611 case nir_intrinsic_load_packed_passthrough_primitive_amd:
612 case nir_intrinsic_load_initial_edgeflags_amd:
613 case nir_intrinsic_gds_atomic_add_amd:
614 case nir_intrinsic_load_rt_arg_scratch_offset_amd:
615 case nir_intrinsic_load_intersection_opaque_amd:
616 case nir_intrinsic_load_vector_arg_amd:
617 case nir_intrinsic_load_btd_stack_id_intel:
618 case nir_intrinsic_load_topology_id_intel:
619 case nir_intrinsic_load_scratch_base_ptr:
620 is_divergent = true;
621 break;
622
623 default:
624 #ifdef NDEBUG
625 is_divergent = true;
626 break;
627 #else
628 nir_print_instr(&instr->instr, stderr);
629 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
630 #endif
631 }
632
633 instr->dest.ssa.divergent = is_divergent;
634 return is_divergent;
635 }
636
637 static bool
visit_tex(nir_tex_instr * instr)638 visit_tex(nir_tex_instr *instr)
639 {
640 if (instr->dest.ssa.divergent)
641 return false;
642
643 bool is_divergent = false;
644
645 for (unsigned i = 0; i < instr->num_srcs; i++) {
646 switch (instr->src[i].src_type) {
647 case nir_tex_src_sampler_deref:
648 case nir_tex_src_sampler_handle:
649 case nir_tex_src_sampler_offset:
650 is_divergent |= instr->src[i].src.ssa->divergent &&
651 instr->sampler_non_uniform;
652 break;
653 case nir_tex_src_texture_deref:
654 case nir_tex_src_texture_handle:
655 case nir_tex_src_texture_offset:
656 is_divergent |= instr->src[i].src.ssa->divergent &&
657 instr->texture_non_uniform;
658 break;
659 default:
660 is_divergent |= instr->src[i].src.ssa->divergent;
661 break;
662 }
663 }
664
665 instr->dest.ssa.divergent = is_divergent;
666 return is_divergent;
667 }
668
669 static bool
visit_load_const(nir_load_const_instr * instr)670 visit_load_const(nir_load_const_instr *instr)
671 {
672 return false;
673 }
674
675 static bool
visit_ssa_undef(nir_ssa_undef_instr * instr)676 visit_ssa_undef(nir_ssa_undef_instr *instr)
677 {
678 return false;
679 }
680
681 static bool
nir_variable_mode_is_uniform(nir_variable_mode mode)682 nir_variable_mode_is_uniform(nir_variable_mode mode) {
683 switch (mode) {
684 case nir_var_uniform:
685 case nir_var_mem_ubo:
686 case nir_var_mem_ssbo:
687 case nir_var_mem_shared:
688 case nir_var_mem_task_payload:
689 case nir_var_mem_global:
690 case nir_var_image:
691 return true;
692 default:
693 return false;
694 }
695 }
696
697 static bool
nir_variable_is_uniform(nir_shader * shader,nir_variable * var)698 nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
699 {
700 if (nir_variable_mode_is_uniform(var->data.mode))
701 return true;
702
703 nir_divergence_options options = shader->options->divergence_analysis_options;
704 gl_shader_stage stage = shader->info.stage;
705
706 if (stage == MESA_SHADER_FRAGMENT &&
707 (options & nir_divergence_single_prim_per_subgroup) &&
708 var->data.mode == nir_var_shader_in &&
709 var->data.interpolation == INTERP_MODE_FLAT)
710 return true;
711
712 if (stage == MESA_SHADER_TESS_CTRL &&
713 (options & nir_divergence_single_patch_per_tcs_subgroup) &&
714 var->data.mode == nir_var_shader_out && var->data.patch)
715 return true;
716
717 if (stage == MESA_SHADER_TESS_EVAL &&
718 (options & nir_divergence_single_patch_per_tes_subgroup) &&
719 var->data.mode == nir_var_shader_in && var->data.patch)
720 return true;
721
722 return false;
723 }
724
725 static bool
visit_deref(nir_shader * shader,nir_deref_instr * deref)726 visit_deref(nir_shader *shader, nir_deref_instr *deref)
727 {
728 if (deref->dest.ssa.divergent)
729 return false;
730
731 bool is_divergent = false;
732 switch (deref->deref_type) {
733 case nir_deref_type_var:
734 is_divergent = !nir_variable_is_uniform(shader, deref->var);
735 break;
736 case nir_deref_type_array:
737 case nir_deref_type_ptr_as_array:
738 is_divergent = deref->arr.index.ssa->divergent;
739 FALLTHROUGH;
740 case nir_deref_type_struct:
741 case nir_deref_type_array_wildcard:
742 is_divergent |= deref->parent.ssa->divergent;
743 break;
744 case nir_deref_type_cast:
745 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
746 deref->parent.ssa->divergent;
747 break;
748 }
749
750 deref->dest.ssa.divergent = is_divergent;
751 return is_divergent;
752 }
753
754 static bool
visit_jump(nir_jump_instr * jump,struct divergence_state * state)755 visit_jump(nir_jump_instr *jump, struct divergence_state *state)
756 {
757 switch (jump->type) {
758 case nir_jump_continue:
759 if (state->divergent_loop_continue)
760 return false;
761 if (state->divergent_loop_cf)
762 state->divergent_loop_continue = true;
763 return state->divergent_loop_continue;
764 case nir_jump_break:
765 if (state->divergent_loop_break)
766 return false;
767 if (state->divergent_loop_cf)
768 state->divergent_loop_break = true;
769 return state->divergent_loop_break;
770 case nir_jump_halt:
771 /* This totally kills invocations so it doesn't add divergence */
772 break;
773 case nir_jump_return:
774 unreachable("NIR divergence analysis: Unsupported return instruction.");
775 break;
776 case nir_jump_goto:
777 case nir_jump_goto_if:
778 unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
779 break;
780 }
781 return false;
782 }
783
784 static bool
set_ssa_def_not_divergent(nir_ssa_def * def,UNUSED void * _state)785 set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
786 {
787 def->divergent = false;
788 return true;
789 }
790
791 static bool
update_instr_divergence(nir_shader * shader,nir_instr * instr)792 update_instr_divergence(nir_shader *shader, nir_instr *instr)
793 {
794 switch (instr->type) {
795 case nir_instr_type_alu:
796 return visit_alu(nir_instr_as_alu(instr));
797 case nir_instr_type_intrinsic:
798 return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
799 case nir_instr_type_tex:
800 return visit_tex(nir_instr_as_tex(instr));
801 case nir_instr_type_load_const:
802 return visit_load_const(nir_instr_as_load_const(instr));
803 case nir_instr_type_ssa_undef:
804 return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
805 case nir_instr_type_deref:
806 return visit_deref(shader, nir_instr_as_deref(instr));
807 case nir_instr_type_jump:
808 case nir_instr_type_phi:
809 case nir_instr_type_call:
810 case nir_instr_type_parallel_copy:
811 default:
812 unreachable("NIR divergence analysis: Unsupported instruction type.");
813 }
814 }
815
816 static bool
visit_block(nir_block * block,struct divergence_state * state)817 visit_block(nir_block *block, struct divergence_state *state)
818 {
819 bool has_changed = false;
820
821 nir_foreach_instr(instr, block) {
822 /* phis are handled when processing the branches */
823 if (instr->type == nir_instr_type_phi)
824 continue;
825
826 if (state->first_visit)
827 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
828
829 if (instr->type == nir_instr_type_jump)
830 has_changed |= visit_jump(nir_instr_as_jump(instr), state);
831 else
832 has_changed |= update_instr_divergence(state->shader, instr);
833 }
834
835 return has_changed;
836 }
837
838 /* There are 3 types of phi instructions:
839 * (1) gamma: represent the joining point of different paths
840 * created by an “if-then-else” branch.
841 * The resulting value is divergent if the branch condition
842 * or any of the source values is divergent. */
843 static bool
visit_if_merge_phi(nir_phi_instr * phi,bool if_cond_divergent)844 visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
845 {
846 if (phi->dest.ssa.divergent)
847 return false;
848
849 unsigned defined_srcs = 0;
850 nir_foreach_phi_src(src, phi) {
851 /* if any source value is divergent, the resulting value is divergent */
852 if (src->src.ssa->divergent) {
853 phi->dest.ssa.divergent = true;
854 return true;
855 }
856 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
857 defined_srcs++;
858 }
859 }
860
861 /* if the condition is divergent and two sources defined, the definition is divergent */
862 if (defined_srcs > 1 && if_cond_divergent) {
863 phi->dest.ssa.divergent = true;
864 return true;
865 }
866
867 return false;
868 }
869
870 /* There are 3 types of phi instructions:
871 * (2) mu: which only exist at loop headers,
872 * merge initial and loop-carried values.
873 * The resulting value is divergent if any source value
874 * is divergent or a divergent loop continue condition
875 * is associated with a different ssa-def. */
876 static bool
visit_loop_header_phi(nir_phi_instr * phi,nir_block * preheader,bool divergent_continue)877 visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
878 {
879 if (phi->dest.ssa.divergent)
880 return false;
881
882 nir_ssa_def* same = NULL;
883 nir_foreach_phi_src(src, phi) {
884 /* if any source value is divergent, the resulting value is divergent */
885 if (src->src.ssa->divergent) {
886 phi->dest.ssa.divergent = true;
887 return true;
888 }
889 /* if this loop is uniform, we're done here */
890 if (!divergent_continue)
891 continue;
892 /* skip the loop preheader */
893 if (src->pred == preheader)
894 continue;
895 /* skip undef values */
896 if (nir_src_is_undef(src->src))
897 continue;
898
899 /* check if all loop-carried values are from the same ssa-def */
900 if (!same)
901 same = src->src.ssa;
902 else if (same != src->src.ssa) {
903 phi->dest.ssa.divergent = true;
904 return true;
905 }
906 }
907
908 return false;
909 }
910
911 /* There are 3 types of phi instructions:
912 * (3) eta: represent values that leave a loop.
913 * The resulting value is divergent if the source value is divergent
914 * or any loop exit condition is divergent for a value which is
915 * not loop-invariant.
916 * (note: there should be no phi for loop-invariant variables.) */
917 static bool
visit_loop_exit_phi(nir_phi_instr * phi,bool divergent_break)918 visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
919 {
920 if (phi->dest.ssa.divergent)
921 return false;
922
923 if (divergent_break) {
924 phi->dest.ssa.divergent = true;
925 return true;
926 }
927
928 /* if any source value is divergent, the resulting value is divergent */
929 nir_foreach_phi_src(src, phi) {
930 if (src->src.ssa->divergent) {
931 phi->dest.ssa.divergent = true;
932 return true;
933 }
934 }
935
936 return false;
937 }
938
939 static bool
visit_if(nir_if * if_stmt,struct divergence_state * state)940 visit_if(nir_if *if_stmt, struct divergence_state *state)
941 {
942 bool progress = false;
943
944 struct divergence_state then_state = *state;
945 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
946 progress |= visit_cf_list(&if_stmt->then_list, &then_state);
947
948 struct divergence_state else_state = *state;
949 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
950 progress |= visit_cf_list(&if_stmt->else_list, &else_state);
951
952 /* handle phis after the IF */
953 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
954 if (instr->type != nir_instr_type_phi)
955 break;
956
957 if (state->first_visit)
958 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
959 progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
960 if_stmt->condition.ssa->divergent);
961 }
962
963 /* join loop divergence information from both branch legs */
964 state->divergent_loop_continue |= then_state.divergent_loop_continue ||
965 else_state.divergent_loop_continue;
966 state->divergent_loop_break |= then_state.divergent_loop_break ||
967 else_state.divergent_loop_break;
968
969 /* A divergent continue makes succeeding loop CF divergent:
970 * not all loop-active invocations participate in the remaining loop-body
971 * which means that a following break might be taken by some invocations, only */
972 state->divergent_loop_cf |= state->divergent_loop_continue;
973
974 return progress;
975 }
976
977 static bool
visit_loop(nir_loop * loop,struct divergence_state * state)978 visit_loop(nir_loop *loop, struct divergence_state *state)
979 {
980 bool progress = false;
981 nir_block *loop_header = nir_loop_first_block(loop);
982 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
983
984 /* handle loop header phis first: we have no knowledge yet about
985 * the loop's control flow or any loop-carried sources. */
986 nir_foreach_instr(instr, loop_header) {
987 if (instr->type != nir_instr_type_phi)
988 break;
989
990 nir_phi_instr *phi = nir_instr_as_phi(instr);
991 if (!state->first_visit && phi->dest.ssa.divergent)
992 continue;
993
994 nir_foreach_phi_src(src, phi) {
995 if (src->pred == loop_preheader) {
996 phi->dest.ssa.divergent = src->src.ssa->divergent;
997 break;
998 }
999 }
1000 progress |= phi->dest.ssa.divergent;
1001 }
1002
1003 /* setup loop state */
1004 struct divergence_state loop_state = *state;
1005 loop_state.divergent_loop_cf = false;
1006 loop_state.divergent_loop_continue = false;
1007 loop_state.divergent_loop_break = false;
1008
1009 /* process loop body until no further changes are made */
1010 bool repeat;
1011 do {
1012 progress |= visit_cf_list(&loop->body, &loop_state);
1013 repeat = false;
1014
1015 /* revisit loop header phis to see if something has changed */
1016 nir_foreach_instr(instr, loop_header) {
1017 if (instr->type != nir_instr_type_phi)
1018 break;
1019
1020 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
1021 loop_preheader,
1022 loop_state.divergent_loop_continue);
1023 }
1024
1025 loop_state.divergent_loop_cf = false;
1026 loop_state.first_visit = false;
1027 } while (repeat);
1028
1029 /* handle phis after the loop */
1030 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
1031 if (instr->type != nir_instr_type_phi)
1032 break;
1033
1034 if (state->first_visit)
1035 nir_instr_as_phi(instr)->dest.ssa.divergent = false;
1036 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
1037 loop_state.divergent_loop_break);
1038 }
1039
1040 loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
1041
1042 return progress;
1043 }
1044
1045 static bool
visit_cf_list(struct exec_list * list,struct divergence_state * state)1046 visit_cf_list(struct exec_list *list, struct divergence_state *state)
1047 {
1048 bool has_changed = false;
1049
1050 foreach_list_typed(nir_cf_node, node, node, list) {
1051 switch (node->type) {
1052 case nir_cf_node_block:
1053 has_changed |= visit_block(nir_cf_node_as_block(node), state);
1054 break;
1055 case nir_cf_node_if:
1056 has_changed |= visit_if(nir_cf_node_as_if(node), state);
1057 break;
1058 case nir_cf_node_loop:
1059 has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
1060 break;
1061 case nir_cf_node_function:
1062 unreachable("NIR divergence analysis: Unsupported cf_node type.");
1063 }
1064 }
1065
1066 return has_changed;
1067 }
1068
1069 void
nir_divergence_analysis(nir_shader * shader)1070 nir_divergence_analysis(nir_shader *shader)
1071 {
1072 shader->info.divergence_analysis_run = true;
1073
1074 struct divergence_state state = {
1075 .stage = shader->info.stage,
1076 .shader = shader,
1077 .divergent_loop_cf = false,
1078 .divergent_loop_continue = false,
1079 .divergent_loop_break = false,
1080 .first_visit = true,
1081 };
1082
1083 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
1084 }
1085
nir_update_instr_divergence(nir_shader * shader,nir_instr * instr)1086 bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1087 {
1088 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
1089
1090 if (instr->type == nir_instr_type_phi) {
1091 nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1092 /* can only update gamma/if phis */
1093 if (!prev || prev->type != nir_cf_node_if)
1094 return false;
1095
1096 nir_if *nif = nir_cf_node_as_if(prev);
1097
1098 visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1099 return true;
1100 }
1101
1102 update_instr_divergence(shader, instr);
1103 return true;
1104 }
1105
1106
1107 bool
nir_has_divergent_loop(nir_shader * shader)1108 nir_has_divergent_loop(nir_shader *shader)
1109 {
1110 bool divergent_loop = false;
1111 nir_function_impl *func = nir_shader_get_entrypoint(shader);
1112
1113 foreach_list_typed(nir_cf_node, node, node, &func->body) {
1114 if (node->type == nir_cf_node_loop && nir_cf_node_as_loop(node)->divergent) {
1115 divergent_loop = true;
1116 break;
1117 }
1118 }
1119
1120 return divergent_loop;
1121 }
1122