1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 3; NOTE: The checks for opt are NOT added by the update script. Those 4; checks are looking for the absence of specific metadata, which 5; cannot be expressed reliably by the generated checks. 6 7; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=ISA 8; RUN: opt --amdgpu-annotate-uniform -S %s | FileCheck %s -check-prefix=UNIFORM 9; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s | FileCheck %s -check-prefix=CONTROLFLOW 10 11; This module creates a divergent branch in block Flow2. The branch is 12; marked as divergent by the divergence analysis but the condition is 13; not. This test ensures that the divergence of the branch is tested, 14; not its condition, so that branch is correctly emitted as divergent. 15 16target triple = "amdgcn-mesa-mesa3d" 17 18define amdgpu_ps void @main(i32 %0, float %1) { 19; ISA-LABEL: main: 20; ISA: ; %bb.0: ; %start 21; ISA-NEXT: v_readfirstlane_b32 s0, v0 22; ISA-NEXT: s_mov_b32 m0, s0 23; ISA-NEXT: s_mov_b32 s0, 0 24; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x 25; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 26; ISA-NEXT: s_mov_b64 s[2:3], 0 27; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7 28; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5 29; ISA-NEXT: s_branch BB0_3 30; ISA-NEXT: BB0_1: ; %Flow1 31; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 32; ISA-NEXT: s_or_b64 exec, exec, s[8:9] 33; ISA-NEXT: s_add_i32 s0, s0, 1 34; ISA-NEXT: s_mov_b64 s[8:9], 0 35; ISA-NEXT: BB0_2: ; %Flow 36; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 37; ISA-NEXT: s_and_b64 s[10:11], exec, s[6:7] 38; ISA-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3] 39; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec 40; ISA-NEXT: s_and_b64 s[8:9], s[8:9], exec 41; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 42; ISA-NEXT: s_andn2_b64 exec, exec, s[2:3] 43; ISA-NEXT: s_cbranch_execz BB0_6 44; ISA-NEXT: BB0_3: ; %loop 45; ISA-NEXT: ; =>This Inner Loop Header: Depth=1 46; ISA-NEXT: s_or_b64 s[6:7], s[6:7], exec 47; ISA-NEXT: s_cmp_lt_u32 s0, 32 48; ISA-NEXT: s_mov_b64 s[8:9], -1 49; ISA-NEXT: s_cbranch_scc0 BB0_2 50; ISA-NEXT: ; %bb.4: ; %endif1 51; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 52; ISA-NEXT: s_mov_b64 s[6:7], -1 53; ISA-NEXT: s_and_saveexec_b64 s[8:9], vcc 54; ISA-NEXT: s_cbranch_execz BB0_1 55; ISA-NEXT: ; %bb.5: ; %endif2 56; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 57; ISA-NEXT: s_xor_b64 s[6:7], exec, -1 58; ISA-NEXT: s_branch BB0_1 59; ISA-NEXT: BB0_6: ; %Flow2 60; ISA-NEXT: s_or_b64 exec, exec, s[2:3] 61; ISA-NEXT: v_mov_b32_e32 v1, 0 62; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[4:5] 63; ISA-NEXT: ; %bb.7: ; %if1 64; ISA-NEXT: v_sqrt_f32_e32 v1, v0 65; ISA-NEXT: ; %bb.8: ; %endloop 66; ISA-NEXT: s_or_b64 exec, exec, s[0:1] 67; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm 68; ISA-NEXT: s_endpgm 69start: 70 %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) 71 br label %loop 72 73loop: ; preds = %Flow, %start 74 %v1 = phi i32 [ 0, %start ], [ %6, %Flow ] 75 %v2 = icmp ugt i32 %v1, 31 76 %2 = xor i1 %v2, true 77 br i1 %2, label %endif1, label %Flow 78 79Flow1: ; preds = %endif2, %endif1 80 %3 = phi i32 [ %v5, %endif2 ], [ undef, %endif1 ] 81 %4 = phi i1 [ false, %endif2 ], [ true, %endif1 ] 82 br label %Flow 83 84; UNIFORM-LABEL: Flow2: 85; UNIFORM-NEXT: br i1 %8, label %if1, label %endloop 86; UNIFORM-NOT: !amdgpu.uniform 87; UNIFORM: if1: 88 89; CONTROLFLOW-LABEL: Flow2: 90; CONTROLFLOW-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %{{.*}}) 91; CONTROLFLOW-NEXT: [[IF:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %{{.*}}) 92; CONTROLFLOW-NEXT: [[COND:%.*]] = extractvalue { i1, i64 } [[IF]], 0 93; CONTROLFLOW-NEXT: %{{.*}} = extractvalue { i1, i64 } [[IF]], 1 94; CONTROLFLOW-NEXT: br i1 [[COND]], label %if1, label %endloop 95 96Flow2: ; preds = %Flow 97 br i1 %8, label %if1, label %endloop 98 99if1: ; preds = %Flow2 100 %v3 = call float @llvm.sqrt.f32(float %v0) 101 br label %endloop 102 103endif1: ; preds = %loop 104 %v4 = fcmp ogt float %v0, 0.000000e+00 105 %5 = xor i1 %v4, true 106 br i1 %5, label %endif2, label %Flow1 107 108Flow: ; preds = %Flow1, %loop 109 %6 = phi i32 [ %3, %Flow1 ], [ undef, %loop ] 110 %7 = phi i1 [ %4, %Flow1 ], [ true, %loop ] 111 %8 = phi i1 [ false, %Flow1 ], [ true, %loop ] 112 br i1 %7, label %Flow2, label %loop 113 114endif2: ; preds = %endif1 115 %v5 = add i32 %v1, 1 116 br label %Flow1 117 118endloop: ; preds = %if1, %Flow2 119 %v6 = phi float [ 0.000000e+00, %Flow2 ], [ %v3, %if1 ] 120 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true) 121 ret void 122} 123 124; Function Attrs: nounwind readnone speculatable willreturn 125declare float @llvm.sqrt.f32(float) #0 126 127; Function Attrs: nounwind readnone speculatable 128declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1 129 130; Function Attrs: inaccessiblememonly nounwind writeonly 131declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2 132 133attributes #0 = { nounwind readnone speculatable willreturn } 134attributes #1 = { nounwind readnone speculatable } 135attributes #2 = { inaccessiblememonly nounwind writeonly } 136