1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3; Where the mask of lanes wanting to exit the loop on this iteration is not 4; obviously already masked by exec (in this case, the xor with -1 inserted by 5; control flow annotation), then lower control flow must insert an S_AND_B64 6; with exec. 7 8; GCN-LABEL: {{^}}needs_and: 9; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}} 10; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]] 11; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]], 12; GCN: s_andn2_b64 exec, exec, [[REG3]] 13 14define void @needs_and(i32 %arg) { 15entry: 16 br label %loop 17 18loop: 19 %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ] 20 %tmp23 = add nuw i32 %tmp23phi, 1 21 %tmp27 = icmp ult i32 %arg, %tmp23 22 br i1 %tmp27, label %then, label %endif 23 24then: ; preds = %bb 25 call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0) 26 br label %endif 27 28endif: ; preds = %bb28, %bb 29 br i1 %tmp27, label %loop, label %loopexit 30 31loopexit: 32 ret void 33} 34 35; Where the mask of lanes wanting to exit the loop on this iteration is 36; obviously already masked by exec (a V_CMP), then lower control flow can omit 37; the S_AND_B64 to avoid an unnecessary instruction. 38 39; GCN-LABEL: {{^}}doesnt_need_and: 40; GCN: v_cmp{{[^ ]*}} [[REG1:[^ ,]*]] 41; GCN: s_or_b64 [[REG2:[^ ,]*]], [[REG1]], 42; GCN: s_andn2_b64 exec, exec, [[REG2]] 43 44define void @doesnt_need_and(i32 %arg) { 45entry: 46 br label %loop 47 48loop: 49 %tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ] 50 %tmp23 = add nuw i32 %tmp23phi, 1 51 %tmp27 = icmp ult i32 %arg, %tmp23 52 call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0) 53 br i1 %tmp27, label %loop, label %loopexit 54 55loopexit: 56 ret void 57} 58 59; Another case where the mask of lanes wanting to exit the loop is not masked 60; by exec, because it is a function parameter. 61 62; GCN-LABEL: {{^}}break_cond_is_arg: 63; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}} 64; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]] 65; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]] 66; GCN: s_or_b64 [[REG3]], [[REG2]], 67 68define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { 69entry: 70 br label %loop 71 72loop: 73 %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ] 74 %tmp23 = add nuw i32 %tmp23phi, 1 75 %tmp27 = icmp ult i32 %arg, %tmp23 76 br i1 %tmp27, label %then, label %endif 77 78then: ; preds = %bb 79 call void @llvm.amdgcn.raw.buffer.store.f32(float undef, <4 x i32> undef, i32 0, i32 undef, i32 0) 80 br label %endif 81 82endif: ; preds = %bb28, %bb 83 br i1 %breakcond, label %loop, label %loopexit 84 85loopexit: 86 ret void 87} 88 89declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #0 90 91attributes #0 = { nounwind writeonly } 92