• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
2
3target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
4
5declare i32 @llvm.amdgcn.workgroup.id.x() #0
6declare i32 @llvm.amdgcn.workgroup.id.y() #0
7declare i32 @llvm.amdgcn.workgroup.id.z() #0
8
9declare i32 @llvm.amdgcn.workitem.id.x() #0
10declare i32 @llvm.amdgcn.workitem.id.y() #0
11declare i32 @llvm.amdgcn.workitem.id.z() #0
12
13declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
14declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
15declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
16
17declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #2
18declare i1 @llvm.amdgcn.is.private(i8* nocapture) #2
19
20; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
21define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
22  %val = call i32 @llvm.amdgcn.workgroup.id.x()
23  store i32 %val, i32 addrspace(1)* %ptr
24  ret void
25}
26
27; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
28define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
29  %val = call i32 @llvm.amdgcn.workgroup.id.y()
30  store i32 %val, i32 addrspace(1)* %ptr
31  ret void
32}
33
34; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
35define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
36  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
37  store volatile i32 %val0, i32 addrspace(1)* %ptr
38  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
39  store volatile i32 %val1, i32 addrspace(1)* %ptr
40  ret void
41}
42
43; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
44define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
45  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
46  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
47  store volatile i32 %val0, i32 addrspace(1)* %ptr
48  store volatile i32 %val1, i32 addrspace(1)* %ptr
49  ret void
50}
51
52; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
53define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
54  %val = call i32 @llvm.amdgcn.workgroup.id.z()
55  store i32 %val, i32 addrspace(1)* %ptr
56  ret void
57}
58
59; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
60define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
61  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
62  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
63  store volatile i32 %val0, i32 addrspace(1)* %ptr
64  store volatile i32 %val1, i32 addrspace(1)* %ptr
65  ret void
66}
67
68; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
69define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
70  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
71  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
72  store volatile i32 %val0, i32 addrspace(1)* %ptr
73  store volatile i32 %val1, i32 addrspace(1)* %ptr
74  ret void
75}
76
77; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
78define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
79  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
80  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
81  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
82  store volatile i32 %val0, i32 addrspace(1)* %ptr
83  store volatile i32 %val1, i32 addrspace(1)* %ptr
84  store volatile i32 %val2, i32 addrspace(1)* %ptr
85  ret void
86}
87
88; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
89define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
90  %val = call i32 @llvm.amdgcn.workitem.id.x()
91  store i32 %val, i32 addrspace(1)* %ptr
92  ret void
93}
94
95; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
96define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
97  %val = call i32 @llvm.amdgcn.workitem.id.y()
98  store i32 %val, i32 addrspace(1)* %ptr
99  ret void
100}
101
102; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
103define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
104  %val = call i32 @llvm.amdgcn.workitem.id.z()
105  store i32 %val, i32 addrspace(1)* %ptr
106  ret void
107}
108
109; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
110define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
111  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
112  %val1 = call i32 @llvm.amdgcn.workgroup.id.x()
113  store volatile i32 %val0, i32 addrspace(1)* %ptr
114  store volatile i32 %val1, i32 addrspace(1)* %ptr
115  ret void
116}
117
118; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
119define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
120  %val0 = call i32 @llvm.amdgcn.workitem.id.y()
121  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
122  store volatile i32 %val0, i32 addrspace(1)* %ptr
123  store volatile i32 %val1, i32 addrspace(1)* %ptr
124  ret void
125}
126
127; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
128define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
129  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
130  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
131  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
132  store volatile i32 %val0, i32 addrspace(1)* %ptr
133  store volatile i32 %val1, i32 addrspace(1)* %ptr
134  store volatile i32 %val2, i32 addrspace(1)* %ptr
135  ret void
136}
137
138; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
139define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
140  %val0 = call i32 @llvm.amdgcn.workitem.id.x()
141  %val1 = call i32 @llvm.amdgcn.workitem.id.y()
142  %val2 = call i32 @llvm.amdgcn.workitem.id.z()
143  %val3 = call i32 @llvm.amdgcn.workgroup.id.x()
144  %val4 = call i32 @llvm.amdgcn.workgroup.id.y()
145  %val5 = call i32 @llvm.amdgcn.workgroup.id.z()
146  store volatile i32 %val0, i32 addrspace(1)* %ptr
147  store volatile i32 %val1, i32 addrspace(1)* %ptr
148  store volatile i32 %val2, i32 addrspace(1)* %ptr
149  store volatile i32 %val3, i32 addrspace(1)* %ptr
150  store volatile i32 %val4, i32 addrspace(1)* %ptr
151  store volatile i32 %val5, i32 addrspace(1)* %ptr
152  ret void
153}
154
155; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 {
156define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 {
157  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
158  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
159  %val = load i32, i32 addrspace(4)* %bc
160  store i32 %val, i32 addrspace(1)* %ptr
161  ret void
162}
163
164; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 {
165define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 {
166  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
167  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
168  %val = load i32, i32 addrspace(4)* %bc
169  store i32 %val, i32 addrspace(1)* %ptr
170  ret void
171}
172
173; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 {
174define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 {
175  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
176  %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)*
177  %val = load i32, i32 addrspace(4)* %bc
178  store i32 %val, i32 addrspace(1)* %ptr
179  ret void
180}
181
182; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 {
183define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
184  %stof = addrspacecast i32 addrspace(3)* %ptr to i32*
185  store volatile i32 0, i32* %stof
186  ret void
187}
188
189; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 {
190define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 {
191  %stof = addrspacecast i32 addrspace(5)* %ptr to i32*
192  store volatile i32 0, i32* %stof
193  ret void
194}
195
196; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
197define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 {
198  %ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
199  store volatile i32 0, i32 addrspace(3)* %ftos
200  ret void
201}
202
203; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
204define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 {
205  %ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
206  store volatile i32 0, i32 addrspace(5)* %ftos
207  ret void
208}
209
210; No-op addrspacecast should not use queue ptr
211; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
212define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 {
213  %stof = addrspacecast i32 addrspace(1)* %ptr to i32*
214  store volatile i32 0, i32* %stof
215  ret void
216}
217
218; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
219define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 {
220  %stof = addrspacecast i32 addrspace(4)* %ptr to i32*
221  %ld = load volatile i32, i32* %stof
222  ret void
223}
224
225; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
226define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 {
227  %ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
228  store volatile i32 0, i32 addrspace(1)* %ftos
229  ret void
230}
231
232; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
233define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 {
234  %ftos = addrspacecast i32* %ptr to i32 addrspace(4)*
235  %ld = load volatile i32, i32 addrspace(4)* %ftos
236  ret void
237}
238
239; HSA: define amdgpu_kernel void @use_is_shared(i8* %ptr) #11 {
240define amdgpu_kernel void @use_is_shared(i8* %ptr) #1 {
241  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
242  %ext = zext i1 %is.shared to i32
243  store i32 %ext, i32 addrspace(1)* undef
244  ret void
245}
246
247; HSA: define amdgpu_kernel void @use_is_private(i8* %ptr) #11 {
248define amdgpu_kernel void @use_is_private(i8* %ptr) #1 {
249  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
250  %ext = zext i1 %is.private to i32
251  store i32 %ext, i32 addrspace(1)* undef
252  ret void
253}
254
255; HSA: define amdgpu_kernel void @use_alloca() #13 {
256define amdgpu_kernel void @use_alloca() #1 {
257  %alloca = alloca i32, addrspace(5)
258  store i32 0, i32 addrspace(5)* %alloca
259  ret void
260}
261
262; HSA: define amdgpu_kernel void @use_alloca_non_entry_block() #13 {
263define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
264entry:
265  br label %bb
266
267bb:
268  %alloca = alloca i32, addrspace(5)
269  store i32 0, i32 addrspace(5)* %alloca
270  ret void
271}
272
273; HSA: define void @use_alloca_func() #13 {
274define void @use_alloca_func() #1 {
275  %alloca = alloca i32, addrspace(5)
276  store i32 0, i32 addrspace(5)* %alloca
277  ret void
278}
279
280attributes #0 = { nounwind readnone speculatable }
281attributes #1 = { nounwind }
282
283; HSA: attributes #0 = { nounwind readnone speculatable willreturn }
284; HSA: attributes #1 = { nounwind }
285; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
286; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
287; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
288; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
289; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
290; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
291; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
292; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
293; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
294; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
295; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
296; HSA: attributes #13 = { nounwind "amdgpu-stack-objects" }
297