1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s 2; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-lds< %s | FileCheck -check-prefix=NOLDS %s 3 4; This normally would be fixed by instcombine to be compare to the GEP 5; indices 6 7; NOLDS-NOT: addrspace(3) 8 9; CHECK-LABEL: @lds_promoted_alloca_icmp_same_derived_pointer( 10; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_same_derived_pointer.alloca, i32 0, i32 %{{[0-9]+}} 11; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a 12; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b 13; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, %ptr1 14define amdgpu_kernel void @lds_promoted_alloca_icmp_same_derived_pointer(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 15 %alloca = alloca [16 x i32], align 4 16 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 17 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b 18 %cmp = icmp eq i32* %ptr0, %ptr1 19 %zext = zext i1 %cmp to i32 20 store volatile i32 %zext, i32 addrspace(1)* %out 21 ret void 22} 23 24; CHECK-LABEL: @lds_promoted_alloca_icmp_null_rhs( 25; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_rhs.alloca, i32 0, i32 %{{[0-9]+}} 26; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a 27; CHECK: %cmp = icmp eq i32 addrspace(3)* %ptr0, null 28define amdgpu_kernel void @lds_promoted_alloca_icmp_null_rhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 29 %alloca = alloca [16 x i32], align 4 30 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 31 %cmp = icmp eq i32* %ptr0, null 32 %zext = zext i1 %cmp to i32 33 store volatile i32 %zext, i32 addrspace(1)* %out 34 ret void 35} 36 37; CHECK-LABEL: @lds_promoted_alloca_icmp_null_lhs( 38; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_icmp_null_lhs.alloca, i32 0, i32 %{{[0-9]+}} 39; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a 40; CHECK: %cmp = icmp eq i32 addrspace(3)* null, %ptr0 41define amdgpu_kernel void @lds_promoted_alloca_icmp_null_lhs(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 42 %alloca = alloca [16 x i32], align 4 43 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 44 %cmp = icmp eq i32* null, %ptr0 45 %zext = zext i1 %cmp to i32 46 store volatile i32 %zext, i32 addrspace(1)* %out 47 ret void 48} 49 50; CHECK-LABEL: @lds_promoted_alloca_icmp_unknown_ptr( 51; CHECK: %alloca = alloca [16 x i32], align 4 52; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 53; CHECK: %ptr1 = call i32* @get_unknown_pointer() 54; CHECK: %cmp = icmp eq i32* %ptr0, %ptr1 55define amdgpu_kernel void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { 56 %alloca = alloca [16 x i32], align 4 57 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 58 %ptr1 = call i32* @get_unknown_pointer() 59 %cmp = icmp eq i32* %ptr0, %ptr1 60 %zext = zext i1 %cmp to i32 61 store volatile i32 %zext, i32 addrspace(1)* %out 62 ret void 63} 64 65declare i32* @get_unknown_pointer() #0 66 67attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" } 68