1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s 2 3; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand( 4; CHECK: %alloca = alloca i32 5; CHECK: select i1 undef, i32* undef, i32* %alloca 6define void @lds_promoted_alloca_select_invalid_pointer_operand() #0 { 7 %alloca = alloca i32, align 4 8 %select = select i1 undef, i32* undef, i32* %alloca 9 store i32 0, i32* %select, align 4 10 ret void 11} 12 13; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers( 14; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}} 15; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a 16; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b 17; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1 18; CHECK: store i32 0, i32 addrspace(3)* %select, align 4 19define void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 { 20 %alloca = alloca [16 x i32], align 4 21 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 22 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b 23 %select = select i1 undef, i32* %ptr0, i32* %ptr1 24 store i32 0, i32* %select, align 4 25 ret void 26} 27 28; FIXME: This should be promotable but requires knowing that both will be promoted first. 29 30; CHECK-LABEL: @lds_promote_alloca_select_two_allocas( 31; CHECK: %alloca0 = alloca i32, i32 16, align 4 32; CHECK: %alloca1 = alloca i32, i32 16, align 4 33; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a 34; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b 35; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1 36define void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 { 37 %alloca0 = alloca i32, i32 16, align 4 38 %alloca1 = alloca i32, i32 16, align 4 39 %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a 40 %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b 41 %select = select i1 undef, i32* %ptr0, i32* %ptr1 42 store i32 0, i32* %select, align 4 43 ret void 44} 45 46; TODO: Maybe this should be canonicalized to select on the constant and GEP after. 47; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers( 48; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}} 49; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1 50; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3 51; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1 52; CHECK: store i32 0, i32 addrspace(3)* %select, align 4 53define void @lds_promote_alloca_select_two_derived_constant_pointers() #0 { 54 %alloca = alloca [16 x i32], align 4 55 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1 56 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3 57 %select = select i1 undef, i32* %ptr0, i32* %ptr1 58 store i32 0, i32* %select, align 4 59 ret void 60} 61 62; CHECK-LABEL: @lds_promoted_alloca_select_input_select( 63; CHECK: getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_select_input_select.alloca, i32 0, i32 %{{[0-9]+}} 64; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a 65; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %b 66; CHECK: %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %c 67; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1 68; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2 69; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4 70define void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 { 71 %alloca = alloca [16 x i32], align 4 72 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 73 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b 74 %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c 75 %select0 = select i1 undef, i32* %ptr0, i32* %ptr1 76 %select1 = select i1 undef, i32* %select0, i32* %ptr2 77 store i32 0, i32* %select1, align 4 78 ret void 79} 80 81define void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 { 82entry: 83 %alloca = alloca [16 x i32], align 4 84 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a 85 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b 86 store i32 0, i32* %ptr0 87 br i1 undef, label %bb1, label %bb2 88 89bb1: 90 %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c 91 %select0 = select i1 undef, i32* undef, i32* %ptr2 92 store i32 0, i32* %ptr1 93 br label %bb2 94 95bb2: 96 %phi.ptr = phi i32* [ %ptr0, %entry ], [ %select0, %bb1 ] 97 %select1 = select i1 undef, i32* %phi.ptr, i32* %ptr1 98 store i32 0, i32* %select1, align 4 99 ret void 100} 101 102; CHECK-LABEL: @select_null_rhs( 103; CHECK-NOT: alloca 104; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null 105define void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 { 106bb: 107 %tmp = alloca double, align 8 108 store double 0.000000e+00, double* %tmp, align 8 109 %tmp2 = icmp eq i32 %arg1, 0 110 %tmp3 = select i1 %tmp2, double* %tmp, double* null 111 store double 1.000000e+00, double* %tmp3, align 8 112 %tmp4 = load double, double* %tmp, align 8 113 store double %tmp4, double addrspace(1)* %arg 114 ret void 115} 116 117; CHECK-LABEL: @select_null_lhs( 118; CHECK-NOT: alloca 119; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}} 120define void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 { 121bb: 122 %tmp = alloca double, align 8 123 store double 0.000000e+00, double* %tmp, align 8 124 %tmp2 = icmp eq i32 %arg1, 0 125 %tmp3 = select i1 %tmp2, double* null, double* %tmp 126 store double 1.000000e+00, double* %tmp3, align 8 127 %tmp4 = load double, double* %tmp, align 8 128 store double %tmp4, double addrspace(1)* %arg 129 ret void 130} 131 132attributes #0 = { norecurse nounwind "amdgpu-max-waves-per-eu"="1" } 133attributes #1 = { norecurse nounwind }