1; RUN: llc -march=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s 2 3target datalayout = "n32" 4 5; CHECK-LABEL: @switch_unreachable_default 6 7define amdgpu_kernel void @switch_unreachable_default(i32 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1) #0 { 8centry: 9 %tid = call i32 @llvm.amdgcn.workitem.id.x() 10 switch i32 %tid, label %sw.default [ 11 i32 0, label %sw.bb0 12 i32 1, label %sw.bb1 13 ] 14 15sw.bb0: 16 br label %sw.epilog 17 18sw.bb1: 19 br label %sw.epilog 20 21sw.default: 22 unreachable 23 24sw.epilog: 25 %ptr = phi i8 addrspace(1)* [%in0, %sw.bb0], [%in1, %sw.bb1] 26 %gep_in = getelementptr inbounds i8, i8 addrspace(1)* %ptr, i64 0 27 br label %sw.while 28 29; The loop below is necessary to preserve the effect of the 30; unreachable default on divergence analysis in the presence of other 31; optimizations. The loop consists of a single block where the loop 32; exit is divergent because it depends on the divergent phi at the 33; start of the block. The checks below ensure that the loop exit is 34; handled correctly as divergent. But the data-flow within the block 35; is sensitive to optimizations; so we just ensure that the relevant 36; operations in the block body are indeed in the same block. 37 38; CHECK: [[PHI:%[a-zA-Z0-9._]+]] = phi i64 39; CHECK-NOT: {{ br }} 40; CHECK: load i8 41; CHECK-NOT: {{ br }} 42; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq 43; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[ICMP]], i64 [[PHI]]) 44; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]]) 45; CHECK: br i1 [[LOOP]] 46 47sw.while: 48 %p = phi i8 addrspace(1)* [ %gep_in, %sw.epilog ], [ %incdec.ptr, %sw.while ] 49 %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ] 50 %char = load i8, i8 addrspace(1)* %p, align 1 51 %tobool = icmp eq i8 %char, 0 52 %incdec.ptr = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 1 53 %count.inc = add i32 %count, 1 54 br i1 %tobool, label %sw.exit, label %sw.while 55 56sw.exit: 57 %tid64 = zext i32 %tid to i64 58 %gep_out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid64 59 store i32 %count, i32 addrspace(1)* %gep_out, align 4 60 ret void 61} 62 63declare i32 @llvm.amdgcn.workitem.id.x() #0 64 65attributes #0 = { nounwind readnone } 66attributes #1 = { convergent noinline optnone } 67