• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
2
3target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
4
5declare i32 @llvm.amdgcn.workitem.id.x() #1
6
7; CHECK-LABEL: @merge_v2p1i8(
8; CHECK: load <2 x i64>
9; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)*
10; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)*
11; CHECK: store <2 x i64> zeroinitializer
12define void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 {
13entry:
14  %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
15  %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1
16
17  %ld.c = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, align 4
18  %ld.c.idx.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b.1, align 4
19
20  store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a, align 4
21  store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a.1, align 4
22
23  ret void
24}
25
26; CHECK-LABEL: @merge_v2p3i8(
27; CHECK: load <2 x i32>
28; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)*
29; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)*
30; CHECK: store <2 x i32> zeroinitializer
31define void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 {
32entry:
33  %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1
34  %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1
35
36  %ld.c = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, align 4
37  %ld.c.idx.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b.1, align 4
38
39  store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a, align 4
40  store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a.1, align 4
41
42  ret void
43}
44
45; CHECK-LABEL: @merge_load_i64_ptr64(
46; CHECK: load <2 x i64>
47; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
48; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
49define void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 {
50entry:
51  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
52  %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
53
54  %ld.0 = load i64, i64 addrspace(1)* %a
55  %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
56
57  ret void
58}
59
60; CHECK-LABEL: @merge_load_ptr64_i64(
61; CHECK: load <2 x i64>
62; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
63; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)*
64define void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 {
65entry:
66  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
67  %a.1 =  getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
68
69  %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
70  %ld.1 = load i64, i64 addrspace(1)* %a.1
71
72  ret void
73}
74
75; CHECK-LABEL: @merge_store_ptr64_i64(
76; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
77; CHECK: insertelement <2 x i64> undef, i64 [[ELT0]], i32 0
78; CHECK: store <2 x i64>
79define void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 {
80entry:
81  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
82  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
83
84
85  store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
86  store i64 %val1, i64 addrspace(1)* %a.1
87
88  ret void
89}
90
91; CHECK-LABEL: @merge_store_i64_ptr64(
92; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
93; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1]], i32 1
94; CHECK: store <2 x i64>
95define void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 {
96entry:
97  %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
98  %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)*
99
100  store i64 %val0, i64 addrspace(1)* %a.cast
101  store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
102
103  ret void
104}
105
106; CHECK-LABEL: @merge_load_i32_ptr32(
107; CHECK: load <2 x i32>
108; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 1
109; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)*
110define void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 {
111entry:
112  %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
113  %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)*
114
115  %ld.0 = load i32, i32 addrspace(3)* %a
116  %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.1.cast
117
118  ret void
119}
120
121; CHECK-LABEL: @merge_load_ptr32_i32(
122; CHECK: load <2 x i32>
123; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 0
124; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)*
125define void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 {
126entry:
127  %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
128  %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
129
130  %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.cast
131  %ld.1 = load i32, i32 addrspace(3)* %a.1
132
133  ret void
134}
135
136; CHECK-LABEL: @merge_store_ptr32_i32(
137; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32
138; CHECK: insertelement <2 x i32> undef, i32 [[ELT0]], i32 0
139; CHECK: store <2 x i32>
140define void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 {
141entry:
142  %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
143  %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
144
145  store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(3)* %a.cast
146  store i32 %val1, i32 addrspace(3)* %a.1
147
148  ret void
149}
150
151; CHECK-LABEL: @merge_store_i32_ptr32(
152; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32
153; CHECK: insertelement <2 x i32> %{{[0-9]+}}, i32 [[ELT1]], i32 1
154; CHECK: store <2 x i32>
155define void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 {
156entry:
157  %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1
158  %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)*
159
160  store i32 %val0, i32 addrspace(3)* %a.cast
161  store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(3)* %a.1
162
163  ret void
164}
165
166; CHECK-LABEL: @no_merge_store_ptr32_i64(
167; CHECK: store i8 addrspace(3)*
168; CHECK: store i64
169define void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 {
170entry:
171  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
172  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
173
174
175  store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(1)* %a.cast
176  store i64 %val1, i64 addrspace(1)* %a.1
177
178  ret void
179}
180
181; CHECK-LABEL: @no_merge_store_i64_ptr32(
182; CHECK: store i64
183; CHECK: store i8 addrspace(3)*
184define void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 {
185entry:
186  %a.1 =  getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1
187  %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)*
188
189  store i64 %val0, i64 addrspace(1)* %a.cast
190  store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(1)* %a.1
191
192  ret void
193}
194
195; CHECK-LABEL: @no_merge_load_i64_ptr32(
196; CHECK: load i64,
197; CHECK: load i8 addrspace(3)*,
198define void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 {
199entry:
200  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
201  %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)*
202
203  %ld.0 = load i64, i64 addrspace(1)* %a
204  %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.1.cast
205
206  ret void
207}
208
209; CHECK-LABEL: @no_merge_load_ptr32_i64(
210; CHECK: load i8 addrspace(3)*,
211; CHECK: load i64,
212define void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 {
213entry:
214  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
215  %a.1 =  getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
216
217  %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.cast
218  %ld.1 = load i64, i64 addrspace(1)* %a.1
219
220  ret void
221}
222
223; XXX - This isn't merged for some reason
224; CHECK-LABEL: @merge_v2p1i8_v2p1i8(
225; CHECK: load <2 x i8 addrspace(1)*>
226; CHECK: load <2 x i8 addrspace(1)*>
227; CHECK: store <2 x i8 addrspace(1)*>
228; CHECK: store <2 x i8 addrspace(1)*>
229define void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 {
230entry:
231  %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1
232  %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1
233
234  %ld.c = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, align 4
235  %ld.c.idx.1 = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b.1, align 4
236
237  store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a, align 4
238  store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a.1, align 4
239  ret void
240}
241
242; CHECK-LABEL: @merge_load_ptr64_f64(
243; CHECK: load <2 x i64>
244; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
245; CHECK: [[ELT0_INT:%[0-9]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)*
246; CHECK: [[ELT1_INT:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
247; CHECK: bitcast i64 [[ELT1_INT]] to double
248define void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 {
249entry:
250  %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
251  %a.1 =  getelementptr inbounds double, double addrspace(1)* %a, i64 1
252
253  %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
254  %ld.1 = load double, double addrspace(1)* %a.1
255
256  ret void
257}
258
259; CHECK-LABEL: @merge_load_f64_ptr64(
260; CHECK: load <2 x i64>
261; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
262; CHECK: bitcast i64 [[ELT0]] to double
263; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
264; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
265define void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 {
266entry:
267  %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
268  %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
269
270  %ld.0 = load double, double addrspace(1)* %a
271  %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
272
273  ret void
274}
275
276; CHECK-LABEL: @merge_store_ptr64_f64(
277; CHECK: [[ELT0_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
278; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
279; CHECK: [[ELT1_INT:%[0-9]+]] = bitcast double %val1 to i64
280; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1
281; CHECK: store <2 x i64>
282define void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 {
283entry:
284  %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
285  %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
286
287  store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
288  store double %val1, double addrspace(1)* %a.1
289
290  ret void
291}
292
293; CHECK-LABEL: @merge_store_f64_ptr64(
294; CHECK: [[ELT0_INT:%[0-9]+]] = bitcast double %val0 to i64
295; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
296; CHECK: [[ELT1_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
297; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1
298; CHECK: store <2 x i64>
299define void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 {
300entry:
301  %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
302  %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)*
303
304  store double %val0, double addrspace(1)* %a.cast
305  store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
306
307  ret void
308}
309
310attributes #0 = { nounwind }
311attributes #1 = { nounwind readnone }
312