• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
2
3; CHECK-LABEL: @lds_promoted_alloca_select_invalid_pointer_operand(
4; CHECK: %alloca = alloca i32
5; CHECK: select i1 undef, i32* undef, i32* %alloca
6define void @lds_promoted_alloca_select_invalid_pointer_operand() #0 {
7  %alloca = alloca i32, align 4
8  %select = select i1 undef, i32* undef, i32* %alloca
9  store i32 0, i32* %select, align 4
10  ret void
11}
12
13; CHECK-LABEL: @lds_promote_alloca_select_two_derived_pointers(
14; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_pointers.alloca, i32 0, i32 %{{[0-9]+}}
15; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %a
16; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 %b
17; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
18; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
19define void @lds_promote_alloca_select_two_derived_pointers(i32 %a, i32 %b) #0 {
20  %alloca = alloca [16 x i32], align 4
21  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
22  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
23  %select = select i1 undef, i32* %ptr0, i32* %ptr1
24  store i32 0, i32* %select, align 4
25  ret void
26}
27
28; FIXME: This should be promotable but requires knowing that both will be promoted first.
29
30; CHECK-LABEL: @lds_promote_alloca_select_two_allocas(
31; CHECK: %alloca0 = alloca i32, i32 16, align 4
32; CHECK: %alloca1 = alloca i32, i32 16, align 4
33; CHECK: %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
34; CHECK: %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
35; CHECK: %select = select i1 undef, i32* %ptr0, i32* %ptr1
36define void @lds_promote_alloca_select_two_allocas(i32 %a, i32 %b) #0 {
37  %alloca0 = alloca i32, i32 16, align 4
38  %alloca1 = alloca i32, i32 16, align 4
39  %ptr0 = getelementptr inbounds i32, i32* %alloca0, i32 %a
40  %ptr1 = getelementptr inbounds i32, i32* %alloca1, i32 %b
41  %select = select i1 undef, i32* %ptr0, i32* %ptr1
42  store i32 0, i32* %select, align 4
43  ret void
44}
45
46; TODO: Maybe this should be canonicalized to select on the constant and GEP after.
47; CHECK-LABEL: @lds_promote_alloca_select_two_derived_constant_pointers(
48; CHECK: [[ARRAYGEP:%[0-9]+]] = getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promote_alloca_select_two_derived_constant_pointers.alloca, i32 0, i32 %{{[0-9]+}}
49; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 1
50; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* [[ARRAYGEP]], i32 0, i32 3
51; CHECK: %select = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
52; CHECK: store i32 0, i32 addrspace(3)* %select, align 4
53define void @lds_promote_alloca_select_two_derived_constant_pointers() #0 {
54  %alloca = alloca [16 x i32], align 4
55  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 1
56  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 3
57  %select = select i1 undef, i32* %ptr0, i32* %ptr1
58  store i32 0, i32* %select, align 4
59  ret void
60}
61
62; CHECK-LABEL: @lds_promoted_alloca_select_input_select(
63; CHECK: getelementptr inbounds [256 x [16 x i32]], [256 x [16 x i32]] addrspace(3)* @lds_promoted_alloca_select_input_select.alloca, i32 0, i32 %{{[0-9]+}}
64; CHECK: %ptr0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
65; CHECK: %ptr1 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %b
66; CHECK: %ptr2 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %c
67; CHECK: %select0 = select i1 undef, i32 addrspace(3)* %ptr0, i32 addrspace(3)* %ptr1
68; CHECK: %select1 = select i1 undef, i32 addrspace(3)* %select0, i32 addrspace(3)* %ptr2
69; CHECK: store i32 0, i32 addrspace(3)* %select1, align 4
70define void @lds_promoted_alloca_select_input_select(i32 %a, i32 %b, i32 %c) #0 {
71  %alloca = alloca [16 x i32], align 4
72  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
73  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
74  %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
75  %select0 = select i1 undef, i32* %ptr0, i32* %ptr1
76  %select1 = select i1 undef, i32* %select0, i32* %ptr2
77  store i32 0, i32* %select1, align 4
78  ret void
79}
80
81define void @lds_promoted_alloca_select_input_phi(i32 %a, i32 %b, i32 %c) #0 {
82entry:
83  %alloca = alloca [16 x i32], align 4
84  %ptr0 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
85  %ptr1 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %b
86  store i32 0, i32* %ptr0
87  br i1 undef, label %bb1, label %bb2
88
89bb1:
90  %ptr2 = getelementptr inbounds [16 x i32], [16 x i32]* %alloca, i32 0, i32 %c
91  %select0 = select i1 undef, i32* undef, i32* %ptr2
92  store i32 0, i32* %ptr1
93  br label %bb2
94
95bb2:
96  %phi.ptr = phi i32* [ %ptr0, %entry ], [ %select0, %bb1 ]
97  %select1 = select i1 undef, i32* %phi.ptr, i32* %ptr1
98  store i32 0, i32* %select1, align 4
99  ret void
100}
101
102; CHECK-LABEL: @select_null_rhs(
103; CHECK-NOT: alloca
104; CHECK: select i1 %tmp2, double addrspace(3)* %{{[0-9]+}}, double addrspace(3)* null
105define void @select_null_rhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
106bb:
107  %tmp = alloca double, align 8
108  store double 0.000000e+00, double* %tmp, align 8
109  %tmp2 = icmp eq i32 %arg1, 0
110  %tmp3 = select i1 %tmp2, double* %tmp, double* null
111  store double 1.000000e+00, double* %tmp3, align 8
112  %tmp4 = load double, double* %tmp, align 8
113  store double %tmp4, double addrspace(1)* %arg
114  ret void
115}
116
117; CHECK-LABEL: @select_null_lhs(
118; CHECK-NOT: alloca
119; CHECK: select i1 %tmp2, double addrspace(3)* null, double addrspace(3)* %{{[0-9]+}}
120define void @select_null_lhs(double addrspace(1)* nocapture %arg, i32 %arg1) #1 {
121bb:
122  %tmp = alloca double, align 8
123  store double 0.000000e+00, double* %tmp, align 8
124  %tmp2 = icmp eq i32 %arg1, 0
125  %tmp3 = select i1 %tmp2, double* null, double* %tmp
126  store double 1.000000e+00, double* %tmp3, align 8
127  %tmp4 = load double, double* %tmp, align 8
128  store double %tmp4, double addrspace(1)* %arg
129  ret void
130}
131
132attributes #0 = { norecurse nounwind "amdgpu-max-waves-per-eu"="1" }
133attributes #1 = { norecurse nounwind }