• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
2
3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
4
5; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
6; CHECK: %alloca = alloca i32
7; CHECK: select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %alloca
8define amdgpu_kernel void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
9  %alloca = alloca i32, align 4, addrspace(5)
10  %select = select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %alloca
11  store i32 0, i32 addrspace(5)* %select, align 4
12  ret void
13}
14
15; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers(
16; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}}
17; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
18; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
19; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
20; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
21define amdgpu_kernel void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
22  %alloca = alloca [16 x i32], align 4, addrspace(5)
23  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
24  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
25  %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
26  store i32 0, i32 addrspace(5)* %select, align 4
27  ret void
28}
29
30; FIXME: This should be promotable but requires knowing that both will be promoted first.
31
32; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
33; CHECK: %alloca0 = alloca i32, i32 16, align 4
34; CHECK: %alloca1 = alloca i32, i32 16, align 4
35; CHECK: %ptr0 = getelementptr inbounds i32, i32 addrspace(5)* %alloca0, i32 %a
36; CHECK: %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %alloca1, i32 %b
37; CHECK: %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
38define amdgpu_kernel void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
39  %alloca0 = alloca i32, i32 16, align 4, addrspace(5)
40  %alloca1 = alloca i32, i32 16, align 4, addrspace(5)
41  %ptr0 = getelementptr inbounds i32, i32 addrspace(5)* %alloca0, i32 %a
42  %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %alloca1, i32 %b
43  %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
44  store i32 0, i32 addrspace(5)* %select, align 4
45  ret void
46}
47
48; TODO: Maybe this should be canonicalized to select on the constant and GEP after.
49; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers(
50; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}}
51; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1
52; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3
53; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
54; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
55define amdgpu_kernel void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
56  %alloca = alloca [16 x i32], align 4, addrspace(5)
57  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1
58  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 3
59  %select = select i1 undef, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
60  store i32 0, i32 addrspace(5)* %select, align 4
61  ret void
62}
63
64; FIXME: Can be promoted, but we'd have to recursively show that the select
65; operands all point to the same alloca.
66
67; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
68; CHECK: alloca
69define amdgpu_kernel void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c, i1 %c1, i1 %c2) #0 {
70  %alloca = alloca [16 x i32], align 4, addrspace(5)
71  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
72  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
73  %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %c
74  %select0 = select i1 %c1, i32 addrspace(5)* %ptr0, i32 addrspace(5)* %ptr1
75  %select1 = select i1 %c2, i32 addrspace(5)* %select0, i32 addrspace(5)* %ptr2
76  store i32 0, i32 addrspace(5)* %select1, align 4
77  ret void
78}
79
80define amdgpu_kernel void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
81entry:
82  %alloca = alloca [16 x i32], align 4, addrspace(5)
83  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a
84  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %b
85  store i32 0, i32 addrspace(5)* %ptr0
86  br i1 undef, label %bb1, label %bb2
87
88bb1:
89  %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %c
90  %select0 = select i1 undef, i32 addrspace(5)* undef, i32 addrspace(5)* %ptr2
91  store i32 0, i32 addrspace(5)* %ptr1
92  br label %bb2
93
94bb2:
95  %phi.ptr = phi i32 addrspace(5)* [ %ptr0, %entry ], [ %select0, %bb1 ]
96  %select1 = select i1 undef, i32 addrspace(5)* %phi.ptr, i32 addrspace(5)* %ptr1
97  store i32 0, i32 addrspace(5)* %select1, align 4
98  ret void
99}
100
101; CHECK-LABEL: @select_null_rhs(
102; CHECK-NOT: alloca
103; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
104define amdgpu_kernel void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
105bb:
106  %tmp = alloca double, align 8, addrspace(5)
107  store double 0.000000e+00, double addrspace(5)* %tmp, align 8
108  %tmp2 = icmp eq i32 %arg1, 0
109  %tmp3 = select i1 %tmp2, double addrspace(5)* %tmp, double addrspace(5)* null
110  store double 1.000000e+00, double addrspace(5)* %tmp3, align 8
111  %tmp4 = load double, double addrspace(5)* %tmp, align 8
112  store double %tmp4, double addrspace(1)* %arg
113  ret void
114}
115
116; CHECK-LABEL: @select_null_lhs(
117; CHECK-NOT: alloca
118; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
119define amdgpu_kernel void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
120bb:
121  %tmp = alloca double, align 8, addrspace(5)
122  store double 0.000000e+00, double addrspace(5)* %tmp, align 8
123  %tmp2 = icmp eq i32 %arg1, 0
124  %tmp3 = select i1 %tmp2, double addrspace(5)* null, double addrspace(5)* %tmp
125  store double 1.000000e+00, double addrspace(5)* %tmp3, align 8
126  %tmp4 = load double, double addrspace(5)* %tmp, align 8
127  store double %tmp4, double addrspace(1)* %arg
128  ret void
129}
130
131attributes #0 = { norecurse nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
132attributes #1 = { norecurse nounwind }
133