• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
4; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
6; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
7
8; OPT-LABEL: @test_sink_global_small_offset_i32(
9; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
10; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
11; OPT: br i1
12; OPT-CI: ptrtoint
13
14; GCN-LABEL: {{^}}test_sink_global_small_offset_i32:
15; GCN: {{^}}BB0_2:
16define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
17entry:
18  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
19  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7
20  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
21  %tmp0 = icmp eq i32 %tid, 0
22  br i1 %tmp0, label %endif, label %if
23
24if:
25  %tmp1 = load i32, i32 addrspace(1)* %in.gep
26  br label %endif
27
28endif:
29  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
30  store i32 %x, i32 addrspace(1)* %out.gep
31  br label %done
32
33done:
34  ret void
35}
36
37; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset(
38; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
39; OPT: br i1
40
41; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
42; GCN: s_and_saveexec_b64
43; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
44; GCN: {{^}}BB1_2:
45; GCN: s_or_b64 exec
46define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
47entry:
48  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
49  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535
50  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
51  %tmp0 = icmp eq i32 %tid, 0
52  br i1 %tmp0, label %endif, label %if
53
54if:
55  %tmp1 = load i8, i8 addrspace(1)* %in.gep
56  %tmp2 = sext i8 %tmp1 to i32
57  br label %endif
58
59endif:
60  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
61  store i32 %x, i32 addrspace(1)* %out.gep
62  br label %done
63
64done:
65  ret void
66}
67
68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
69; GCN: s_and_saveexec_b64
70; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
71; GCN: {{^}}BB2_2:
72; GCN: s_or_b64 exec
73define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
74entry:
75  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024
76  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095
77  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
78  %tmp0 = icmp eq i32 %tid, 0
79  br i1 %tmp0, label %endif, label %if
80
81if:
82  %tmp1 = load i8, i8 addrspace(1)* %in.gep
83  %tmp2 = sext i8 %tmp1 to i32
84  br label %endif
85
86endif:
87  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
88  store i32 %x, i32 addrspace(1)* %out.gep
89  br label %done
90
91done:
92  ret void
93}
94
95; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
96; GCN: s_and_saveexec_b64
97; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
98; GCN: {{^}}BB3_2:
99; GCN: s_or_b64 exec
100define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
101entry:
102  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999
103  %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096
104  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
105  %tmp0 = icmp eq i32 %tid, 0
106  br i1 %tmp0, label %endif, label %if
107
108if:
109  %tmp1 = load i8, i8 addrspace(1)* %in.gep
110  %tmp2 = sext i8 %tmp1 to i32
111  br label %endif
112
113endif:
114  %x = phi i32 [ %tmp2, %if ], [ 0, %entry ]
115  store i32 %x, i32 addrspace(1)* %out.gep
116  br label %done
117
118done:
119  ret void
120}
121
122; OPT-LABEL: @test_sink_scratch_small_offset_i32(
123; OPT-NOT:  getelementptr [512 x i32]
124; OPT: br i1
125; OPT: ptrtoint
126
127; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32:
128; GCN: s_and_saveexec_b64
129; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
130; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
131; GCN: {{^}}BB4_2:
132define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
133entry:
134  %alloca = alloca [512 x i32], align 4
135  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
136  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
137  %add.arg = add i32 %arg, 8
138  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023
139  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
140  %tmp0 = icmp eq i32 %tid, 0
141  br i1 %tmp0, label %endif, label %if
142
143if:
144  store volatile i32 123, i32* %alloca.gep
145  %tmp1 = load volatile i32, i32* %alloca.gep
146  br label %endif
147
148endif:
149  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
150  store i32 %x, i32 addrspace(1)* %out.gep.0
151  %load = load volatile i32, i32* %alloca.gep
152  store i32 %load, i32 addrspace(1)* %out.gep.1
153  br label %done
154
155done:
156  ret void
157}
158
159; OPT-LABEL: @test_no_sink_scratch_large_offset_i32(
160; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
161; OPT: br i1
162; OPT-NOT: ptrtoint
163
164; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32:
165; GCN: s_and_saveexec_b64
166; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
167; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
168; GCN: {{^}}BB5_2:
169define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
170entry:
171  %alloca = alloca [512 x i32], align 4
172  %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998
173  %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999
174  %add.arg = add i32 %arg, 8
175  %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024
176  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
177  %tmp0 = icmp eq i32 %tid, 0
178  br i1 %tmp0, label %endif, label %if
179
180if:
181  store volatile i32 123, i32* %alloca.gep
182  %tmp1 = load volatile i32, i32* %alloca.gep
183  br label %endif
184
185endif:
186  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
187  store i32 %x, i32 addrspace(1)* %out.gep.0
188  %load = load volatile i32, i32* %alloca.gep
189  store i32 %load, i32 addrspace(1)* %out.gep.1
190  br label %done
191
192done:
193  ret void
194}
195
196; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
197; GCN: s_and_saveexec_b64
198; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
199; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
200; GCN: {{^}}BB6_2:
201define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
202entry:
203  %offset.ext = zext i32 %offset to i64
204  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
205  %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext
206  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
207  %tmp0 = icmp eq i32 %tid, 0
208  br i1 %tmp0, label %endif, label %if
209
210if:
211  %tmp1 = load i32, i32 addrspace(1)* %in.gep
212  br label %endif
213
214endif:
215  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
216  store i32 %x, i32 addrspace(1)* %out.gep
217  br label %done
218
219done:
220  ret void
221}
222
223attributes #0 = { nounwind readnone }
224attributes #1 = { nounwind }
225
226
227
228; OPT-LABEL: @test_sink_constant_small_offset_i32
229; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
230; OPT: br i1
231
232; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
233; GCN: s_and_saveexec_b64
234; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
235; GCN: s_or_b64 exec, exec
236define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
237entry:
238  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
239  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
240  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
241  %tmp0 = icmp eq i32 %tid, 0
242  br i1 %tmp0, label %endif, label %if
243
244if:
245  %tmp1 = load i32, i32 addrspace(2)* %in.gep
246  br label %endif
247
248endif:
249  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
250  store i32 %x, i32 addrspace(1)* %out.gep
251  br label %done
252
253done:
254  ret void
255}
256
257; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
258; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
259; OPT: br i1
260
261; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
262; GCN: s_and_saveexec_b64
263; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
264; GCN: s_or_b64 exec, exec
265define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
266entry:
267  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
268  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
269  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
270  %tmp0 = icmp eq i32 %tid, 0
271  br i1 %tmp0, label %endif, label %if
272
273if:
274  %tmp1 = load i32, i32 addrspace(2)* %in.gep
275  br label %endif
276
277endif:
278  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
279  store i32 %x, i32 addrspace(1)* %out.gep
280  br label %done
281
282done:
283  ret void
284}
285
286; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
287; OPT-SI:  getelementptr i32, i32 addrspace(2)*
288; OPT-CI-NOT:  getelementptr i32, i32 addrspace(2)*
289; OPT-VI-NOT:  getelementptr i32, i32 addrspace(2)*
290; OPT: br i1
291
292; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
293; GCN: s_and_saveexec_b64
294; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
295
296; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
297; GCN: s_or_b64 exec, exec
298define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
299entry:
300  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
301  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
302  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
303  %tmp0 = icmp eq i32 %tid, 0
304  br i1 %tmp0, label %endif, label %if
305
306if:
307  %tmp1 = load i32, i32 addrspace(2)* %in.gep
308  br label %endif
309
310endif:
311  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
312  store i32 %x, i32 addrspace(1)* %out.gep
313  br label %done
314
315done:
316  ret void
317}
318
319; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
320; OPT-SI: getelementptr i32, i32 addrspace(2)*
321; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
322; OPT: br i1
323
324; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
325; GCN: s_and_saveexec_b64
326; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
327; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
328; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
329; GCN: s_or_b64 exec, exec
330define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
331entry:
332  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
333  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
334  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
335  %tmp0 = icmp eq i32 %tid, 0
336  br i1 %tmp0, label %endif, label %if
337
338if:
339  %tmp1 = load i32, i32 addrspace(2)* %in.gep
340  br label %endif
341
342endif:
343  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
344  store i32 %x, i32 addrspace(1)* %out.gep
345  br label %done
346
347done:
348  ret void
349}
350
351; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
352; OPT: getelementptr i32, i32 addrspace(2)*
353; OPT: br i1
354
355; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
356; GCN: s_and_saveexec_b64
357; GCN: s_add_u32
358; GCN: s_addc_u32
359; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
360; GCN: s_or_b64 exec, exec
361define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
362entry:
363  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
364  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
365  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
366  %tmp0 = icmp eq i32 %tid, 0
367  br i1 %tmp0, label %endif, label %if
368
369if:
370  %tmp1 = load i32, i32 addrspace(2)* %in.gep
371  br label %endif
372
373endif:
374  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
375  store i32 %x, i32 addrspace(1)* %out.gep
376  br label %done
377
378done:
379  ret void
380}
381
382; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
383; GCN: s_and_saveexec_b64
384; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
385; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
386
387; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
388; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
389
390; GCN: s_or_b64 exec, exec
391define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
392entry:
393  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
394  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
395  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
396  %tmp0 = icmp eq i32 %tid, 0
397  br i1 %tmp0, label %endif, label %if
398
399if:
400  %tmp1 = load i32, i32 addrspace(2)* %in.gep
401  br label %endif
402
403endif:
404  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
405  store i32 %x, i32 addrspace(1)* %out.gep
406  br label %done
407
408done:
409  ret void
410}
411
412; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
413; OPT-SI: getelementptr i32, i32 addrspace(2)*
414; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
415; OPT-VI: getelementptr i32, i32 addrspace(2)*
416; OPT: br i1
417
418; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
419; GCN: s_and_saveexec_b64
420; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
421; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
422
423; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
424
425; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
426; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
427
428; GCN: s_or_b64 exec, exec
429define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
430entry:
431  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
432  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
433  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
434  %tmp0 = icmp eq i32 %tid, 0
435  br i1 %tmp0, label %endif, label %if
436
437if:
438  %tmp1 = load i32, i32 addrspace(2)* %in.gep
439  br label %endif
440
441endif:
442  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
443  store i32 %x, i32 addrspace(1)* %out.gep
444  br label %done
445
446done:
447  ret void
448}
449
450%struct.foo = type { [3 x float], [3 x float] }
451
452; OPT-LABEL: @sink_ds_address(
453; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64
454
455; GCN-LABEL: {{^}}sink_ds_address:
456; GCN: s_load_dword [[SREG1:s[0-9]+]],
457; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
458; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5
459define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
460entry:
461  %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
462  %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
463  br label %bb32
464
465bb32:
466  %a = load float, float addrspace(3)* %x, align 4
467  %b = load float, float addrspace(3)* %y, align 4
468  %cmp = fcmp one float %a, %b
469  br i1 %cmp, label %bb34, label %bb33
470
471bb33:
472  unreachable
473
474bb34:
475  unreachable
476}
477
478declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
479
480attributes #0 = { nounwind readnone }
481