• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
3
4; GCN-LABEL: {{^}}use_dispatch_ptr:
5; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
6; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
7; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
8define void @use_dispatch_ptr() #1 {
9  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
10  %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
11  %value = load volatile i32, i32 addrspace(4)* %header_ptr
12  ret void
13}
14
15; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
16; GCN: enable_sgpr_dispatch_ptr = 1
17; GCN: s_mov_b64 s[6:7], s[4:5]
18define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
19  call void @use_dispatch_ptr()
20  ret void
21}
22
23; GCN-LABEL: {{^}}use_queue_ptr:
24; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
25; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
26; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
27define void @use_queue_ptr() #1 {
28  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
29  %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
30  %value = load volatile i32, i32 addrspace(4)* %header_ptr
31  ret void
32}
33
34; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
35; GCN: enable_sgpr_queue_ptr = 1
36; GCN: s_mov_b64 s[6:7], s[4:5]
37; GCN: s_swappc_b64
38define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
39  call void @use_queue_ptr()
40  ret void
41}
42
43; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
44; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
45; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
46; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
47; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
48; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
49; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
50define void @use_queue_ptr_addrspacecast() #1 {
51  %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
52  store volatile i32 0, i32* %asc
53  ret void
54}
55
56; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
57; CIVI: enable_sgpr_queue_ptr = 1
58
59; CIVI: s_mov_b64 s[6:7], s[4:5]
60; GFX9-NOT: s_mov_b64
61; GCN: s_swappc_b64
62define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
63  call void @use_queue_ptr_addrspacecast()
64  ret void
65}
66
67; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
68; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
69; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
70; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
71define void @use_kernarg_segment_ptr() #1 {
72  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
73  %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
74  %value = load volatile i32, i32 addrspace(4)* %header_ptr
75  ret void
76}
77
78; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
79; GCN: enable_sgpr_kernarg_segment_ptr = 1
80; GCN: s_mov_b64 s[6:7], s[4:5]
81; GCN: s_swappc_b64
82define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
83  call void @use_kernarg_segment_ptr()
84  ret void
85}
86
87; GCN-LABEL: {{^}}use_dispatch_id:
88; GCN: ; use s[6:7]
89define void @use_dispatch_id() #1 {
90  %id = call i64 @llvm.amdgcn.dispatch.id()
91  call void asm sideeffect "; use $0", "s"(i64 %id)
92  ret void
93}
94
95; No kernarg segment so that there is a mov to check. With kernarg
96; pointer enabled, it happens to end up in the right place anyway.
97
98; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
99; GCN: enable_sgpr_dispatch_id = 1
100
101; GCN: s_mov_b64 s[6:7], s[4:5]
102define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
103  call void @use_dispatch_id()
104  ret void
105}
106
107; GCN-LABEL: {{^}}use_workgroup_id_x:
108; GCN: s_waitcnt
109; GCN: ; use s6
110define void @use_workgroup_id_x() #1 {
111  %val = call i32 @llvm.amdgcn.workgroup.id.x()
112  call void asm sideeffect "; use $0", "s"(i32 %val)
113  ret void
114}
115
116; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
117; GCN: s_waitcnt
118; GCN: s_mov_b32 s5, s32
119; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4
120; GCN: ; use s6
121; GCN: s_setpc_b64
122define void @use_stack_workgroup_id_x() #1 {
123  %alloca = alloca i32, addrspace(5)
124  store volatile i32 0, i32 addrspace(5)* %alloca
125  %val = call i32 @llvm.amdgcn.workgroup.id.x()
126  call void asm sideeffect "; use $0", "s"(i32 %val)
127  ret void
128}
129
130; GCN-LABEL: {{^}}use_workgroup_id_y:
131; GCN: s_waitcnt
132; GCN: ; use s6
133define void @use_workgroup_id_y() #1 {
134  %val = call i32 @llvm.amdgcn.workgroup.id.y()
135  call void asm sideeffect "; use $0", "s"(i32 %val)
136  ret void
137}
138
139; GCN-LABEL: {{^}}use_workgroup_id_z:
140; GCN: s_waitcnt
141; GCN: ; use s6
142define void @use_workgroup_id_z() #1 {
143  %val = call i32 @llvm.amdgcn.workgroup.id.z()
144  call void asm sideeffect "; use $0", "s"(i32 %val)
145  ret void
146}
147
148; GCN-LABEL: {{^}}use_workgroup_id_xy:
149; GCN: ; use s6
150; GCN: ; use s7
151define void @use_workgroup_id_xy() #1 {
152  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
153  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
154  call void asm sideeffect "; use $0", "s"(i32 %val0)
155  call void asm sideeffect "; use $0", "s"(i32 %val1)
156  ret void
157}
158
159; GCN-LABEL: {{^}}use_workgroup_id_xyz:
160; GCN: ; use s6
161; GCN: ; use s7
162; GCN: ; use s8
163define void @use_workgroup_id_xyz() #1 {
164  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
165  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
166  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
167  call void asm sideeffect "; use $0", "s"(i32 %val0)
168  call void asm sideeffect "; use $0", "s"(i32 %val1)
169  call void asm sideeffect "; use $0", "s"(i32 %val2)
170  ret void
171}
172
173; GCN-LABEL: {{^}}use_workgroup_id_xz:
174; GCN: ; use s6
175; GCN: ; use s7
176define void @use_workgroup_id_xz() #1 {
177  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
178  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
179  call void asm sideeffect "; use $0", "s"(i32 %val0)
180  call void asm sideeffect "; use $0", "s"(i32 %val1)
181  ret void
182}
183
184; GCN-LABEL: {{^}}use_workgroup_id_yz:
185; GCN: ; use s6
186; GCN: ; use s7
187define void @use_workgroup_id_yz() #1 {
188  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
189  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
190  call void asm sideeffect "; use $0", "s"(i32 %val0)
191  call void asm sideeffect "; use $0", "s"(i32 %val1)
192  ret void
193}
194
195; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
196; GCN: enable_sgpr_workgroup_id_x = 1
197; GCN: enable_sgpr_workgroup_id_y = 0
198; GCN: enable_sgpr_workgroup_id_z = 0
199
200; GCN-NOT: s6
201; GCN: s_mov_b32 s33, s7
202; GCN-NOT: s6
203; GCN: s_mov_b32 s4, s33
204; GCN-NOT: s6
205; GCN: s_mov_b32 s32, s33
206; GCN: s_swappc_b64
207define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
208  call void @use_workgroup_id_x()
209  ret void
210}
211
212; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
213; GCN: enable_sgpr_workgroup_id_x = 1
214; GCN: enable_sgpr_workgroup_id_y = 1
215; GCN: enable_sgpr_workgroup_id_z = 0
216
217; GCN: s_mov_b32 s33, s8
218; GCN-DAG: s_mov_b32 s4, s33
219; GCN-DAG: s_mov_b32 s6, s7
220; GCN: s_mov_b32 s32, s33
221; GCN: s_swappc_b64
222define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
223  call void @use_workgroup_id_y()
224  ret void
225}
226
227; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
228; GCN: enable_sgpr_workgroup_id_x = 1
229; GCN: enable_sgpr_workgroup_id_y = 0
230; GCN: enable_sgpr_workgroup_id_z = 1
231
232; GCN: s_mov_b32 s33, s8
233; GCN-DAG: s_mov_b32 s4, s33
234; GCN-DAG: s_mov_b32 s6, s7
235; GCN: s_swappc_b64
236define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
237  call void @use_workgroup_id_z()
238  ret void
239}
240
241; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
242; GCN: enable_sgpr_workgroup_id_x = 1
243; GCN: enable_sgpr_workgroup_id_y = 1
244; GCN: enable_sgpr_workgroup_id_z = 0
245
246; GCN: s_mov_b32 s33, s8
247; GCN-NOT: s6
248; GCN-NOT: s7
249; GCN: s_mov_b32 s4, s33
250; GCN-NOT: s6
251; GCN-NOT: s7
252; GCN: s_mov_b32 s32, s33
253; GCN-NOT: s6
254; GCN-NOT: s7
255; GCN: s_swappc_b64
256define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
257  call void @use_workgroup_id_xy()
258  ret void
259}
260
261; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
262; GCN: enable_sgpr_workgroup_id_x = 1
263; GCN: enable_sgpr_workgroup_id_y = 1
264; GCN: enable_sgpr_workgroup_id_z = 1
265
266; GCN: s_mov_b32 s33, s9
267
268; GCN-NOT: s6
269; GCN-NOT: s7
270; GCN-NOT: s8
271
272; GCN: s_mov_b32 s4, s33
273
274; GCN-NOT: s6
275; GCN-NOT: s7
276; GCN-NOT: s8
277
278; GCN: s_mov_b32 s32, s33
279
280; GCN-NOT: s6
281; GCN-NOT: s7
282; GCN-NOT: s8
283
284; GCN: s_swappc_b64
285define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
286  call void @use_workgroup_id_xyz()
287  ret void
288}
289
290; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
291; GCN: enable_sgpr_workgroup_id_x = 1
292; GCN: enable_sgpr_workgroup_id_y = 0
293; GCN: enable_sgpr_workgroup_id_z = 1
294
295; GCN: s_mov_b32 s33, s8
296; GCN-NOT: s6
297; GCN-NOT: s7
298
299; GCN: s_mov_b32 s4, s33
300; GCN-NOT: s6
301; GCN-NOT: s7
302
303; GCN: s_mov_b32 s32, s33
304; GCN-NOT: s6
305; GCN-NOT: s7
306
307; GCN: s_swappc_b64
308define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
309  call void @use_workgroup_id_xz()
310  ret void
311}
312
313; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
314; GCN: enable_sgpr_workgroup_id_x = 1
315; GCN: enable_sgpr_workgroup_id_y = 1
316; GCN: enable_sgpr_workgroup_id_z = 1
317
318; GCN: s_mov_b32 s33, s9
319; GCN: s_mov_b32 s6, s7
320; GCN: s_mov_b32 s4, s33
321; GCN: s_mov_b32 s7, s8
322; GCN: s_mov_b32 s32, s33
323; GCN: s_swappc_b64
324define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
325  call void @use_workgroup_id_yz()
326  ret void
327}
328
329; Argument is in right place already
330; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
331; GCN-NOT: s6
332define void @func_indirect_use_workgroup_id_x() #1 {
333  call void @use_workgroup_id_x()
334  ret void
335}
336
337; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
338; GCN-NOT: s6
339define void @func_indirect_use_workgroup_id_y() #1 {
340  call void @use_workgroup_id_y()
341  ret void
342}
343
344; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
345; GCN-NOT: s6
346define void @func_indirect_use_workgroup_id_z() #1 {
347  call void @use_workgroup_id_z()
348  ret void
349}
350
351; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
352; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
353; GCN: ; use s6
354define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
355  %val = call i32 @llvm.amdgcn.workgroup.id.x()
356  store volatile i32 %arg0, i32 addrspace(1)* undef
357  call void asm sideeffect "; use $0", "s"(i32 %val)
358  ret void
359}
360
361; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
362; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
363; GCN: ; use s6
364define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
365  %val = call i32 @llvm.amdgcn.workgroup.id.y()
366  store volatile i32 %arg0, i32 addrspace(1)* undef
367  call void asm sideeffect "; use $0", "s"(i32 %val)
368  ret void
369}
370
371; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
372; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
373; GCN: ; use s6
374define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
375  %val = call i32 @llvm.amdgcn.workgroup.id.z()
376  store volatile i32 %arg0, i32 addrspace(1)* undef
377  call void asm sideeffect "; use $0", "s"(i32 %val)
378  ret void
379}
380
381; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
382; GCN: enable_sgpr_workgroup_id_x = 1
383; GCN: enable_sgpr_workgroup_id_y = 0
384; GCN: enable_sgpr_workgroup_id_z = 0
385
386; GCN-DAG: s_mov_b32 s33, s7
387; GCN-DAG: v_mov_b32_e32 v0, 0x22b
388
389; GCN-NOT: s6
390; GCN: s_mov_b32 s4, s33
391; GCN-NOT: s6
392; GCN-DAG: s_mov_b32 s32, s33
393; GCN: s_swappc_b64
394define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
395  call void @other_arg_use_workgroup_id_x(i32 555)
396  ret void
397}
398
399; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
400; GCN: enable_sgpr_workgroup_id_x = 1
401; GCN: enable_sgpr_workgroup_id_y = 1
402; GCN: enable_sgpr_workgroup_id_z = 0
403
404; GCN-DAG: s_mov_b32 s33, s8
405; GCN-DAG: v_mov_b32_e32 v0, 0x22b
406; GCN-DAG: s_mov_b32 s4, s33
407; GCN-DAG: s_mov_b32 s6, s7
408; GCN-DAG: s_mov_b32 s32, s33
409; GCN: s_swappc_b64
410define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
411  call void @other_arg_use_workgroup_id_y(i32 555)
412  ret void
413}
414
415; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
416; GCN: enable_sgpr_workgroup_id_x = 1
417; GCN: enable_sgpr_workgroup_id_y = 0
418; GCN: enable_sgpr_workgroup_id_z = 1
419
420; GCN: s_mov_b32 s33, s8
421; GCN-DAG: v_mov_b32_e32 v0, 0x22b
422; GCN-DAG: s_mov_b32 s4, s33
423; GCN-DAG: s_mov_b32 s6, s7
424
425; GCN: s_mov_b32 s32, s33
426; GCN: s_swappc_b64
427define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
428  call void @other_arg_use_workgroup_id_z(i32 555)
429  ret void
430}
431
432; GCN-LABEL: {{^}}use_every_sgpr_input:
433; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
434; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
435; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
436; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
437; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
438; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
439; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
440; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10
441; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
442; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
443; GCN: ; use s[12:13]
444; GCN: ; use s14
445; GCN: ; use s15
446; GCN: ; use s16
447define void @use_every_sgpr_input() #1 {
448  %alloca = alloca i32, align 4, addrspace(5)
449  store volatile i32 0, i32 addrspace(5)* %alloca
450
451  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
452  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
453  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
454
455  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
456  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
457  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
458
459  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
460  %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
461  %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
462
463  %val3 = call i64 @llvm.amdgcn.dispatch.id()
464  call void asm sideeffect "; use $0", "s"(i64 %val3)
465
466  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
467  call void asm sideeffect "; use $0", "s"(i32 %val4)
468
469  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
470  call void asm sideeffect "; use $0", "s"(i32 %val5)
471
472  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
473  call void asm sideeffect "; use $0", "s"(i32 %val6)
474
475  ret void
476}
477
478; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
479; GCN: enable_sgpr_workgroup_id_x = 1
480; GCN: enable_sgpr_workgroup_id_y = 1
481; GCN: enable_sgpr_workgroup_id_z = 1
482; GCN: enable_sgpr_workgroup_info = 0
483
484; GCN: enable_sgpr_private_segment_buffer = 1
485; GCN: enable_sgpr_dispatch_ptr = 1
486; GCN: enable_sgpr_queue_ptr = 1
487; GCN: enable_sgpr_kernarg_segment_ptr = 1
488; GCN: enable_sgpr_dispatch_id = 1
489; GCN: enable_sgpr_flat_scratch_init = 1
490
491; GCN: s_mov_b32 s33, s17
492; GCN: s_mov_b64 s[12:13], s[10:11]
493; GCN: s_mov_b64 s[10:11], s[8:9]
494; GCN: s_mov_b64 s[8:9], s[6:7]
495; GCN: s_mov_b64 s[6:7], s[4:5]
496; GCN: s_mov_b32 s4, s33
497; GCN: s_mov_b32 s32, s33
498; GCN: s_swappc_b64
499define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
500  call void @use_every_sgpr_input()
501  ret void
502}
503
504; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
505; GCN-NOT: s6
506; GCN-NOT: s7
507; GCN-NOT: s8
508; GCN-NOT: s9
509; GCN-NOT: s10
510; GCN-NOT: s11
511; GCN-NOT: s12
512; GCN-NOT: s13
513; GCN-NOT: s[6:7]
514; GCN-NOT: s[8:9]
515; GCN-NOT: s[10:11]
516; GCN-NOT: s[12:13]
517define void @func_indirect_use_every_sgpr_input() #1 {
518  call void @use_every_sgpr_input()
519  ret void
520}
521
522; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
523; GCN-DAG: s_mov_b32 s6, s14
524; GCN-DAG: s_mov_b32 s7, s15
525; GCN-DAG: s_mov_b32 s8, s16
526; GCN: s_swappc_b64
527define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
528  %alloca = alloca i32, align 4, addrspace(5)
529  store volatile i32 0, i32 addrspace(5)* %alloca
530
531  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
532  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
533  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
534
535  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
536  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
537  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
538
539  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
540  %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
541  %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
542
543  %val3 = call i64 @llvm.amdgcn.dispatch.id()
544  call void asm sideeffect "; use $0", "s"(i64 %val3)
545
546  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
547  call void asm sideeffect "; use $0", "s"(i32 %val4)
548
549  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
550  call void asm sideeffect "; use $0", "s"(i32 %val5)
551
552  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
553  call void asm sideeffect "; use $0", "s"(i32 %val6)
554
555  call void @use_workgroup_id_xyz()
556  ret void
557}
558
559; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
560; GCN: s_mov_b32 s5, s32
561; GCN: s_add_u32 s32, s32, 0x400
562
563; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
564; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
565; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
566; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
567; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
568; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
569
570; GCN-DAG: s_mov_b32 s6, s14
571; GCN-DAG: s_mov_b32 s7, s15
572; GCN-DAG: s_mov_b32 s8, s16
573
574; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
575; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
576; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
577
578; GCN: s_swappc_b64
579
580; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
581; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]]
582; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]]
583; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
584; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]]
585; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]]
586; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
587; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]]
588; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]]
589; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
590; GCN: ; use
591; GCN: ; use [[SAVE_X]]
592; GCN: ; use [[SAVE_Y]]
593; GCN: ; use [[SAVE_Z]]
594define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
595  %alloca = alloca i32, align 4, addrspace(5)
596  call void @use_workgroup_id_xyz()
597
598  store volatile i32 0, i32 addrspace(5)* %alloca
599
600  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
601  %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
602  %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
603
604  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
605  %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
606  %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
607
608  %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
609  %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
610  %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
611
612  %val3 = call i64 @llvm.amdgcn.dispatch.id()
613  call void asm sideeffect "; use $0", "s"(i64 %val3)
614
615  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
616  call void asm sideeffect "; use $0", "s"(i32 %val4)
617
618  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
619  call void asm sideeffect "; use $0", "s"(i32 %val5)
620
621  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
622  call void asm sideeffect "; use $0", "s"(i32 %val6)
623
624  ret void
625}
626
627declare i32 @llvm.amdgcn.workgroup.id.x() #0
628declare i32 @llvm.amdgcn.workgroup.id.y() #0
629declare i32 @llvm.amdgcn.workgroup.id.z() #0
630declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
631declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
632declare i64 @llvm.amdgcn.dispatch.id() #0
633declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
634
635attributes #0 = { nounwind readnone speculatable }
636attributes #1 = { nounwind noinline }
637