1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s 2 3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 4 5; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand( 6; CHECK: %alloca = alloca i32 7; CHECK: select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %alloca 8define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 { 9 %alloca = alloca i32, align 4, addrspace(5) 10 %select = select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %alloca 11 store i32 0, i32 addrspace(5)* %select, align 4 12 ret void 13} 14 15; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers( 16; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}} 17; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a 18; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b 19; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1 20; CHECK: store i32 0, i32 addrspace(3)* %select, align 4 21define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 { 22 %alloca = alloca [16 x i32], align 4, addrspace(5) 23 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a 24 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b 25 %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1 26 store i32 0, i32 addrspace(5)* %select, align 4 27 ret void 28} 29 30; FIXME: This should be promotable but requires knowing that both will be promoted first. 31 32; CHECK-LABEL: @lds_promote_alloca_select_two_allocas( 33; CHECK: %alloca0 = alloca i32, i32 16, align 4 34; CHECK: %alloca1 = alloca i32, i32 16, align 4 35; CHECK: %ptr0 = getelementptr inbounds i32, i32 addrspace(5)* %alloca0, i32 %a 36; CHECK: %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %alloca1, i32 %b 37; CHECK: %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1 38define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 { 39 %alloca0 = alloca i32, i32 16, align 4, addrspace(5) 40 %alloca1 = alloca i32, i32 16, align 4, addrspace(5) 41 %ptr0 = getelementptr inbounds i32, i32 addrspace(5)* %alloca0, i32 %a 42 %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %alloca1, i32 %b 43 %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1 44 store i32 0, i32 addrspace(5)* %select, align 4 45 ret void 46} 47 48; TODO: Maybe this should be canonicalized to select on the constant and GEP after. 49; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers( 50; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}} 51; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1 52; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3 53; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1 54; CHECK: store i32 0, i32 addrspace(3)* %select, align 4 55define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() #0 { 56 %alloca = alloca [16 x i32], align 4, addrspace(5) 57 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1 58 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 3 59 %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1 60 store i32 0, i32 addrspace(5)* %select, align 4 61 ret void 62} 63 64; FIXME: Can be promoted, but we'd have to recursively show that the select 65; operands all point to the same alloca. 66 67; CHECK-LABEL: @lds_promoted_alloca_select_input_select( 68; CHECK: alloca 69define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c, i1 %c1, i1 %c2) #0 { 70 %alloca = alloca [16 x i32], align 4, addrspace(5) 71 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a 72 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b 73 %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %c 74 %select0 = select i1 %c1, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1 75 %select1 = select i1 %c2, i32 addrspace(5)* %select0, i32 addrspace(5)* %ptr2 76 store i32 0, i32 addrspace(5)* %select1, align 4 77 ret void 78} 79 80define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 { 81entry: 82 %alloca = alloca [16 x i32], align 4, addrspace(5) 83 %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a 84 %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b 85 store i32 0, i32 addrspace(5)* %ptr0 86 br i1 undef, label %bb1, label %bb2 87 88bb1: 89 %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %c 90 %select0 = select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %ptr2 91 store i32 0, i32 addrspace(5)* %ptr1 92 br label %bb2 93 94bb2: 95 %phi.ptr = phi i32 addrspace(5)* [ %ptr0, %entry ], [ %select0, %bb1 ] 96 %select1 = select i1 undef, i32 addrspace(5)* %phi.ptr, i32 addrspace(5)* %ptr1 97 store i32 0, i32 addrspace(5)* %select1, align 4 98 ret void 99} 100 101; CHECK-LABEL: @select_null_rhs( 102; CHECK-NOT: alloca 103; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null 104define amdgpu_kernel void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 { 105bb: 106 %tmp = alloca double, align 8, addrspace(5) 107 store double 0.000000e+00, double addrspace(5)* %tmp, align 8 108 %tmp2 = icmp eq i32 %arg1, 0 109 %tmp3 = select i1 %tmp2, double addrspace(5)* %tmp, double addrspace(5)* null 110 store double 1.000000e+00, double addrspace(5)* %tmp3, align 8 111 %tmp4 = load double, double addrspace(5)* %tmp, align 8 112 store double %tmp4, double addrspace(1)* %arg 113 ret void 114} 115 116; CHECK-LABEL: @select_null_lhs( 117; CHECK-NOT: alloca 118; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}} 119define amdgpu_kernel void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 { 120bb: 121 %tmp = alloca double, align 8, addrspace(5) 122 store double 0.000000e+00, double addrspace(5)* %tmp, align 8 123 %tmp2 = icmp eq i32 %arg1, 0 124 %tmp3 = select i1 %tmp2, double addrspace(5)* null, double addrspace(5)* %tmp 125 store double 1.000000e+00, double addrspace(5)* %tmp3, align 8 126 %tmp4 = load double, double addrspace(5)* %tmp, align 8 127 store double %tmp4, double addrspace(1)* %arg 128 ret void 129} 130 131attributes #0 = { norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" } 132attributes #1 = { norecurse nounwind } 133