• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4
5declare hidden amdgpu_gfx void @external_void_func_void() #0
6
7define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
8; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
12; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
13; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
14; GFX9-NEXT:    v_writelane_b32 v40, s33, 4
15; GFX9-NEXT:    v_writelane_b32 v40, s34, 0
16; GFX9-NEXT:    v_writelane_b32 v40, s35, 1
17; GFX9-NEXT:    v_writelane_b32 v40, s30, 2
18; GFX9-NEXT:    s_mov_b32 s33, s32
19; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
20; GFX9-NEXT:    s_getpc_b64 s[34:35]
21; GFX9-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
22; GFX9-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
23; GFX9-NEXT:    v_writelane_b32 v40, s31, 3
24; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
25; GFX9-NEXT:    ;;#ASMSTART
26; GFX9-NEXT:    ;;#ASMEND
27; GFX9-NEXT:    s_swappc_b64 s[30:31], s[34:35]
28; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
29; GFX9-NEXT:    v_readlane_b32 s5, v40, 3
30; GFX9-NEXT:    v_readlane_b32 s35, v40, 1
31; GFX9-NEXT:    v_readlane_b32 s34, v40, 0
32; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
33; GFX9-NEXT:    v_readlane_b32 s33, v40, 4
34; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
35; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
36; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
37; GFX9-NEXT:    s_waitcnt vmcnt(0)
38; GFX9-NEXT:    s_setpc_b64 s[4:5]
39;
40; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
41; GFX10:       ; %bb.0:
42; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
44; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
45; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
46; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
47; GFX10-NEXT:    s_mov_b32 exec_lo, s4
48; GFX10-NEXT:    v_writelane_b32 v40, s33, 4
49; GFX10-NEXT:    s_mov_b32 s33, s32
50; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
51; GFX10-NEXT:    v_writelane_b32 v40, s34, 0
52; GFX10-NEXT:    v_writelane_b32 v40, s35, 1
53; GFX10-NEXT:    s_getpc_b64 s[34:35]
54; GFX10-NEXT:    s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
55; GFX10-NEXT:    s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
56; GFX10-NEXT:    v_writelane_b32 v40, s30, 2
57; GFX10-NEXT:    v_writelane_b32 v40, s31, 3
58; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
59; GFX10-NEXT:    ;;#ASMSTART
60; GFX10-NEXT:    ;;#ASMEND
61; GFX10-NEXT:    s_swappc_b64 s[30:31], s[34:35]
62; GFX10-NEXT:    v_readlane_b32 s4, v40, 2
63; GFX10-NEXT:    v_readlane_b32 s5, v40, 3
64; GFX10-NEXT:    v_readlane_b32 s35, v40, 1
65; GFX10-NEXT:    v_readlane_b32 s34, v40, 0
66; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
67; GFX10-NEXT:    v_readlane_b32 s33, v40, 4
68; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
69; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
70; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
71; GFX10-NEXT:    s_mov_b32 exec_lo, s6
72; GFX10-NEXT:    s_waitcnt vmcnt(0)
73; GFX10-NEXT:    s_setpc_b64 s[4:5]
74  call amdgpu_gfx void @external_void_func_void()
75  call void asm sideeffect "", ""() #0
76  call amdgpu_gfx void @external_void_func_void()
77  ret void
78}
79
80define amdgpu_gfx void @void_func_void_clobber_s30_s31() #1 {
81; GFX9-LABEL: void_func_void_clobber_s30_s31:
82; GFX9:       ; %bb.0:
83; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GFX9-NEXT:    s_mov_b64 s[4:5], s[30:31]
85; GFX9-NEXT:    ;;#ASMSTART
86; GFX9-NEXT:    ; clobber
87; GFX9-NEXT:    ;;#ASMEND
88; GFX9-NEXT:    s_setpc_b64 s[4:5]
89;
90; GFX10-LABEL: void_func_void_clobber_s30_s31:
91; GFX10:       ; %bb.0:
92; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
94; GFX10-NEXT:    s_mov_b64 s[4:5], s[30:31]
95; GFX10-NEXT:    ;;#ASMSTART
96; GFX10-NEXT:    ; clobber
97; GFX10-NEXT:    ;;#ASMEND
98; GFX10-NEXT:    s_setpc_b64 s[4:5]
99  call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
100  ret void
101}
102
103define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 {
104; GFX9-LABEL: test_call_void_func_void_mayclobber_s31:
105; GFX9:       ; %bb.0:
106; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
108; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
109; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
110; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
111; GFX9-NEXT:    v_writelane_b32 v40, s34, 0
112; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
113; GFX9-NEXT:    s_mov_b32 s33, s32
114; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
115; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
116; GFX9-NEXT:    ;;#ASMSTART
117; GFX9-NEXT:    ; def s31
118; GFX9-NEXT:    ;;#ASMEND
119; GFX9-NEXT:    s_getpc_b64 s[4:5]
120; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
121; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
122; GFX9-NEXT:    s_mov_b32 s34, s31
123; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
124; GFX9-NEXT:    v_readlane_b32 s4, v40, 1
125; GFX9-NEXT:    s_mov_b32 s31, s34
126; GFX9-NEXT:    ;;#ASMSTART
127; GFX9-NEXT:    ; use s31
128; GFX9-NEXT:    ;;#ASMEND
129; GFX9-NEXT:    v_readlane_b32 s5, v40, 2
130; GFX9-NEXT:    v_readlane_b32 s34, v40, 0
131; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
132; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
133; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
134; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
135; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
136; GFX9-NEXT:    s_waitcnt vmcnt(0)
137; GFX9-NEXT:    s_setpc_b64 s[4:5]
138;
139; GFX10-LABEL: test_call_void_func_void_mayclobber_s31:
140; GFX10:       ; %bb.0:
141; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
143; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
144; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
145; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
146; GFX10-NEXT:    s_mov_b32 exec_lo, s4
147; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
148; GFX10-NEXT:    s_mov_b32 s33, s32
149; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
150; GFX10-NEXT:    s_getpc_b64 s[4:5]
151; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
152; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
153; GFX10-NEXT:    v_writelane_b32 v40, s34, 0
154; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
155; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
156; GFX10-NEXT:    ;;#ASMSTART
157; GFX10-NEXT:    ; def s31
158; GFX10-NEXT:    ;;#ASMEND
159; GFX10-NEXT:    s_mov_b32 s34, s31
160; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
161; GFX10-NEXT:    v_readlane_b32 s4, v40, 1
162; GFX10-NEXT:    s_mov_b32 s31, s34
163; GFX10-NEXT:    ;;#ASMSTART
164; GFX10-NEXT:    ; use s31
165; GFX10-NEXT:    ;;#ASMEND
166; GFX10-NEXT:    v_readlane_b32 s5, v40, 2
167; GFX10-NEXT:    v_readlane_b32 s34, v40, 0
168; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
169; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
170; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
171; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
172; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
173; GFX10-NEXT:    s_mov_b32 exec_lo, s6
174; GFX10-NEXT:    s_waitcnt vmcnt(0)
175; GFX10-NEXT:    s_setpc_b64 s[4:5]
176  %s31 = call i32 asm sideeffect "; def $0", "={s31}"()
177  call amdgpu_gfx void @external_void_func_void()
178  call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
179  ret void
180}
181
182define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1)* %out) #0 {
183; GFX9-LABEL: test_call_void_func_void_mayclobber_v31:
184; GFX9:       ; %bb.0:
185; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
187; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
188; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
189; GFX9-NEXT:    v_writelane_b32 v41, s33, 2
190; GFX9-NEXT:    v_writelane_b32 v41, s30, 0
191; GFX9-NEXT:    s_mov_b32 s33, s32
192; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
193; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
194; GFX9-NEXT:    ;;#ASMSTART
195; GFX9-NEXT:    ; def v31
196; GFX9-NEXT:    ;;#ASMEND
197; GFX9-NEXT:    s_getpc_b64 s[4:5]
198; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
199; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
200; GFX9-NEXT:    v_writelane_b32 v41, s31, 1
201; GFX9-NEXT:    v_mov_b32_e32 v40, v31
202; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
203; GFX9-NEXT:    v_mov_b32_e32 v31, v40
204; GFX9-NEXT:    ;;#ASMSTART
205; GFX9-NEXT:    ; use v31
206; GFX9-NEXT:    ;;#ASMEND
207; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
208; GFX9-NEXT:    v_readlane_b32 s4, v41, 0
209; GFX9-NEXT:    v_readlane_b32 s5, v41, 1
210; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
211; GFX9-NEXT:    v_readlane_b32 s33, v41, 2
212; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
213; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
214; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
215; GFX9-NEXT:    s_waitcnt vmcnt(0)
216; GFX9-NEXT:    s_setpc_b64 s[4:5]
217;
218; GFX10-LABEL: test_call_void_func_void_mayclobber_v31:
219; GFX10:       ; %bb.0:
220; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
222; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
223; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
224; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
225; GFX10-NEXT:    s_mov_b32 exec_lo, s4
226; GFX10-NEXT:    v_writelane_b32 v41, s33, 2
227; GFX10-NEXT:    s_mov_b32 s33, s32
228; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
229; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
230; GFX10-NEXT:    ;;#ASMSTART
231; GFX10-NEXT:    ; def v31
232; GFX10-NEXT:    ;;#ASMEND
233; GFX10-NEXT:    v_writelane_b32 v41, s30, 0
234; GFX10-NEXT:    s_getpc_b64 s[4:5]
235; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
236; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
237; GFX10-NEXT:    v_mov_b32_e32 v40, v31
238; GFX10-NEXT:    v_writelane_b32 v41, s31, 1
239; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
240; GFX10-NEXT:    v_mov_b32_e32 v31, v40
241; GFX10-NEXT:    ;;#ASMSTART
242; GFX10-NEXT:    ; use v31
243; GFX10-NEXT:    ;;#ASMEND
244; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
245; GFX10-NEXT:    v_readlane_b32 s4, v41, 0
246; GFX10-NEXT:    v_readlane_b32 s5, v41, 1
247; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
248; GFX10-NEXT:    v_readlane_b32 s33, v41, 2
249; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
250; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
251; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
252; GFX10-NEXT:    s_mov_b32 exec_lo, s6
253; GFX10-NEXT:    s_waitcnt vmcnt(0)
254; GFX10-NEXT:    s_setpc_b64 s[4:5]
255  %v31 = call i32 asm sideeffect "; def $0", "={v31}"()
256  call amdgpu_gfx void @external_void_func_void()
257  call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
258  ret void
259}
260
261
262define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* %out) #0 {
263; GFX9-LABEL: test_call_void_func_void_preserves_s33:
264; GFX9:       ; %bb.0:
265; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
267; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
268; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
269; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
270; GFX9-NEXT:    s_mov_b32 s33, s32
271; GFX9-NEXT:    v_writelane_b32 v40, s33, 0
272; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
273; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
274; GFX9-NEXT:    s_getpc_b64 s[4:5]
275; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
276; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
277; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
278; GFX9-NEXT:    ;;#ASMSTART
279; GFX9-NEXT:    ; def s33
280; GFX9-NEXT:    ;;#ASMEND
281; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
282; GFX9-NEXT:    ;;#ASMSTART
283; GFX9-NEXT:    ; use s33
284; GFX9-NEXT:    ;;#ASMEND
285; GFX9-NEXT:    v_readlane_b32 s4, v40, 1
286; GFX9-NEXT:    v_readlane_b32 s33, v40, 0
287; GFX9-NEXT:    v_readlane_b32 s5, v40, 2
288; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
289; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
290; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
291; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
292; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
293; GFX9-NEXT:    s_waitcnt vmcnt(0)
294; GFX9-NEXT:    s_setpc_b64 s[4:5]
295;
296; GFX10-LABEL: test_call_void_func_void_preserves_s33:
297; GFX10:       ; %bb.0:
298; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
300; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
301; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
302; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
303; GFX10-NEXT:    s_mov_b32 exec_lo, s4
304; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
305; GFX10-NEXT:    s_mov_b32 s33, s32
306; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
307; GFX10-NEXT:    s_getpc_b64 s[4:5]
308; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
309; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
310; GFX10-NEXT:    v_writelane_b32 v40, s33, 0
311; GFX10-NEXT:    ;;#ASMSTART
312; GFX10-NEXT:    ; def s33
313; GFX10-NEXT:    ;;#ASMEND
314; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
315; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
316; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
317; GFX10-NEXT:    ;;#ASMSTART
318; GFX10-NEXT:    ; use s33
319; GFX10-NEXT:    ;;#ASMEND
320; GFX10-NEXT:    v_readlane_b32 s4, v40, 1
321; GFX10-NEXT:    v_readlane_b32 s33, v40, 0
322; GFX10-NEXT:    v_readlane_b32 s5, v40, 2
323; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
324; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
325; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
326; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
327; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
328; GFX10-NEXT:    s_mov_b32 exec_lo, s6
329; GFX10-NEXT:    s_waitcnt vmcnt(0)
330; GFX10-NEXT:    s_setpc_b64 s[4:5]
331  %s33 = call i32 asm sideeffect "; def $0", "={s33}"()
332  call amdgpu_gfx void @external_void_func_void()
333  call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
334  ret void
335}
336
337define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* %out) #0 {
338; GFX9-LABEL: test_call_void_func_void_preserves_s34:
339; GFX9:       ; %bb.0:
340; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
342; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
343; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
344; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
345; GFX9-NEXT:    v_writelane_b32 v40, s34, 0
346; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
347; GFX9-NEXT:    s_mov_b32 s33, s32
348; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
349; GFX9-NEXT:    s_getpc_b64 s[4:5]
350; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
351; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
352; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
353; GFX9-NEXT:    ;;#ASMSTART
354; GFX9-NEXT:    ; def s34
355; GFX9-NEXT:    ;;#ASMEND
356; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
357; GFX9-NEXT:    v_readlane_b32 s4, v40, 1
358; GFX9-NEXT:    ;;#ASMSTART
359; GFX9-NEXT:    ; use s34
360; GFX9-NEXT:    ;;#ASMEND
361; GFX9-NEXT:    v_readlane_b32 s5, v40, 2
362; GFX9-NEXT:    v_readlane_b32 s34, v40, 0
363; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
364; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
365; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
366; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
367; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
368; GFX9-NEXT:    s_waitcnt vmcnt(0)
369; GFX9-NEXT:    s_setpc_b64 s[4:5]
370;
371; GFX10-LABEL: test_call_void_func_void_preserves_s34:
372; GFX10:       ; %bb.0:
373; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
375; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
376; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
377; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
378; GFX10-NEXT:    s_mov_b32 exec_lo, s4
379; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
380; GFX10-NEXT:    s_mov_b32 s33, s32
381; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
382; GFX10-NEXT:    s_getpc_b64 s[4:5]
383; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
384; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
385; GFX10-NEXT:    v_writelane_b32 v40, s34, 0
386; GFX10-NEXT:    ;;#ASMSTART
387; GFX10-NEXT:    ; def s34
388; GFX10-NEXT:    ;;#ASMEND
389; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
390; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
391; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
392; GFX10-NEXT:    v_readlane_b32 s4, v40, 1
393; GFX10-NEXT:    ;;#ASMSTART
394; GFX10-NEXT:    ; use s34
395; GFX10-NEXT:    ;;#ASMEND
396; GFX10-NEXT:    v_readlane_b32 s5, v40, 2
397; GFX10-NEXT:    v_readlane_b32 s34, v40, 0
398; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
399; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
400; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
401; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
402; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
403; GFX10-NEXT:    s_mov_b32 exec_lo, s6
404; GFX10-NEXT:    s_waitcnt vmcnt(0)
405; GFX10-NEXT:    s_setpc_b64 s[4:5]
406  %s34 = call i32 asm sideeffect "; def $0", "={s34}"()
407  call amdgpu_gfx void @external_void_func_void()
408  call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
409  ret void
410}
411
412define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* %out) #0 {
413; GFX9-LABEL: test_call_void_func_void_preserves_v40:
414; GFX9:       ; %bb.0:
415; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
417; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
418; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
419; GFX9-NEXT:    v_writelane_b32 v41, s33, 2
420; GFX9-NEXT:    v_writelane_b32 v41, s30, 0
421; GFX9-NEXT:    s_mov_b32 s33, s32
422; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
423; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
424; GFX9-NEXT:    s_getpc_b64 s[4:5]
425; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
426; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
427; GFX9-NEXT:    v_writelane_b32 v41, s31, 1
428; GFX9-NEXT:    ;;#ASMSTART
429; GFX9-NEXT:    ; def v40
430; GFX9-NEXT:    ;;#ASMEND
431; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
432; GFX9-NEXT:    ;;#ASMSTART
433; GFX9-NEXT:    ; use v40
434; GFX9-NEXT:    ;;#ASMEND
435; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
436; GFX9-NEXT:    v_readlane_b32 s4, v41, 0
437; GFX9-NEXT:    v_readlane_b32 s5, v41, 1
438; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
439; GFX9-NEXT:    v_readlane_b32 s33, v41, 2
440; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
441; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
442; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
443; GFX9-NEXT:    s_waitcnt vmcnt(0)
444; GFX9-NEXT:    s_setpc_b64 s[4:5]
445;
446; GFX10-LABEL: test_call_void_func_void_preserves_v40:
447; GFX10:       ; %bb.0:
448; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
450; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
451; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
452; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
453; GFX10-NEXT:    s_mov_b32 exec_lo, s4
454; GFX10-NEXT:    v_writelane_b32 v41, s33, 2
455; GFX10-NEXT:    s_mov_b32 s33, s32
456; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
457; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
458; GFX10-NEXT:    s_getpc_b64 s[4:5]
459; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
460; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
461; GFX10-NEXT:    v_writelane_b32 v41, s30, 0
462; GFX10-NEXT:    ;;#ASMSTART
463; GFX10-NEXT:    ; def v40
464; GFX10-NEXT:    ;;#ASMEND
465; GFX10-NEXT:    v_writelane_b32 v41, s31, 1
466; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
467; GFX10-NEXT:    ;;#ASMSTART
468; GFX10-NEXT:    ; use v40
469; GFX10-NEXT:    ;;#ASMEND
470; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
471; GFX10-NEXT:    v_readlane_b32 s4, v41, 0
472; GFX10-NEXT:    v_readlane_b32 s5, v41, 1
473; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
474; GFX10-NEXT:    v_readlane_b32 s33, v41, 2
475; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
476; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
477; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
478; GFX10-NEXT:    s_mov_b32 exec_lo, s6
479; GFX10-NEXT:    s_waitcnt vmcnt(0)
480; GFX10-NEXT:    s_setpc_b64 s[4:5]
481  %v40 = call i32 asm sideeffect "; def $0", "={v40}"()
482  call amdgpu_gfx void @external_void_func_void()
483  call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
484  ret void
485}
486
487define hidden void @void_func_void_clobber_s33() #1 {
488; GFX9-LABEL: void_func_void_clobber_s33:
489; GFX9:       ; %bb.0:
490; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491; GFX9-NEXT:    v_writelane_b32 v0, s33, 0
492; GFX9-NEXT:    ;;#ASMSTART
493; GFX9-NEXT:    ; clobber
494; GFX9-NEXT:    ;;#ASMEND
495; GFX9-NEXT:    v_readlane_b32 s33, v0, 0
496; GFX9-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX10-LABEL: void_func_void_clobber_s33:
499; GFX10:       ; %bb.0:
500; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
502; GFX10-NEXT:    v_writelane_b32 v0, s33, 0
503; GFX10-NEXT:    ;;#ASMSTART
504; GFX10-NEXT:    ; clobber
505; GFX10-NEXT:    ;;#ASMEND
506; GFX10-NEXT:    v_readlane_b32 s33, v0, 0
507; GFX10-NEXT:    s_setpc_b64 s[30:31]
508  call void asm sideeffect "; clobber", "~{s33}"() #0
509  ret void
510}
511
512define hidden void @void_func_void_clobber_s34() #1 {
513; GFX9-LABEL: void_func_void_clobber_s34:
514; GFX9:       ; %bb.0:
515; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516; GFX9-NEXT:    v_writelane_b32 v0, s34, 0
517; GFX9-NEXT:    ;;#ASMSTART
518; GFX9-NEXT:    ; clobber
519; GFX9-NEXT:    ;;#ASMEND
520; GFX9-NEXT:    v_readlane_b32 s34, v0, 0
521; GFX9-NEXT:    s_setpc_b64 s[30:31]
522;
523; GFX10-LABEL: void_func_void_clobber_s34:
524; GFX10:       ; %bb.0:
525; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
527; GFX10-NEXT:    v_writelane_b32 v0, s34, 0
528; GFX10-NEXT:    ;;#ASMSTART
529; GFX10-NEXT:    ; clobber
530; GFX10-NEXT:    ;;#ASMEND
531; GFX10-NEXT:    v_readlane_b32 s34, v0, 0
532; GFX10-NEXT:    s_setpc_b64 s[30:31]
533  call void asm sideeffect "; clobber", "~{s34}"() #0
534  ret void
535}
536
537define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
538; GFX9-LABEL: test_call_void_func_void_clobber_s33:
539; GFX9:       ; %bb.0:
540; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
542; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
543; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
544; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
545; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
546; GFX9-NEXT:    s_mov_b32 s33, s32
547; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
548; GFX9-NEXT:    s_getpc_b64 s[4:5]
549; GFX9-NEXT:    s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4
550; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12
551; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
552; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
553; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
554; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
555; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
556; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
557; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
558; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
559; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
560; GFX9-NEXT:    s_waitcnt vmcnt(0)
561; GFX9-NEXT:    s_setpc_b64 s[4:5]
562;
563; GFX10-LABEL: test_call_void_func_void_clobber_s33:
564; GFX10:       ; %bb.0:
565; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
567; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
568; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
569; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
570; GFX10-NEXT:    s_mov_b32 exec_lo, s4
571; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
572; GFX10-NEXT:    s_mov_b32 s33, s32
573; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
574; GFX10-NEXT:    s_getpc_b64 s[4:5]
575; GFX10-NEXT:    s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4
576; GFX10-NEXT:    s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12
577; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
578; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
579; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
580; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
581; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
582; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
583; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
584; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
585; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
586; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
587; GFX10-NEXT:    s_mov_b32 exec_lo, s6
588; GFX10-NEXT:    s_waitcnt vmcnt(0)
589; GFX10-NEXT:    s_setpc_b64 s[4:5]
590  call amdgpu_gfx void @void_func_void_clobber_s33()
591  ret void
592}
593
594define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
595; GFX9-LABEL: test_call_void_func_void_clobber_s34:
596; GFX9:       ; %bb.0:
597; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
599; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
600; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
601; GFX9-NEXT:    v_writelane_b32 v40, s33, 2
602; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
603; GFX9-NEXT:    s_mov_b32 s33, s32
604; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
605; GFX9-NEXT:    s_getpc_b64 s[4:5]
606; GFX9-NEXT:    s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4
607; GFX9-NEXT:    s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12
608; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
609; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
610; GFX9-NEXT:    v_readlane_b32 s4, v40, 0
611; GFX9-NEXT:    v_readlane_b32 s5, v40, 1
612; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
613; GFX9-NEXT:    v_readlane_b32 s33, v40, 2
614; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
615; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
616; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
617; GFX9-NEXT:    s_waitcnt vmcnt(0)
618; GFX9-NEXT:    s_setpc_b64 s[4:5]
619;
620; GFX10-LABEL: test_call_void_func_void_clobber_s34:
621; GFX10:       ; %bb.0:
622; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
624; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
625; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
626; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
627; GFX10-NEXT:    s_mov_b32 exec_lo, s4
628; GFX10-NEXT:    v_writelane_b32 v40, s33, 2
629; GFX10-NEXT:    s_mov_b32 s33, s32
630; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
631; GFX10-NEXT:    s_getpc_b64 s[4:5]
632; GFX10-NEXT:    s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4
633; GFX10-NEXT:    s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12
634; GFX10-NEXT:    v_writelane_b32 v40, s30, 0
635; GFX10-NEXT:    v_writelane_b32 v40, s31, 1
636; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
637; GFX10-NEXT:    v_readlane_b32 s4, v40, 0
638; GFX10-NEXT:    v_readlane_b32 s5, v40, 1
639; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
640; GFX10-NEXT:    v_readlane_b32 s33, v40, 2
641; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
642; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
643; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
644; GFX10-NEXT:    s_mov_b32 exec_lo, s6
645; GFX10-NEXT:    s_waitcnt vmcnt(0)
646; GFX10-NEXT:    s_setpc_b64 s[4:5]
647  call amdgpu_gfx void @void_func_void_clobber_s34()
648  ret void
649}
650
651define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
652; GFX9-LABEL: callee_saved_sgpr_kernel:
653; GFX9:       ; %bb.0:
654; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
656; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
657; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
658; GFX9-NEXT:    v_writelane_b32 v40, s33, 3
659; GFX9-NEXT:    v_writelane_b32 v40, s40, 0
660; GFX9-NEXT:    v_writelane_b32 v40, s30, 1
661; GFX9-NEXT:    s_mov_b32 s33, s32
662; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
663; GFX9-NEXT:    s_getpc_b64 s[4:5]
664; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
665; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
666; GFX9-NEXT:    v_writelane_b32 v40, s31, 2
667; GFX9-NEXT:    ;;#ASMSTART
668; GFX9-NEXT:    ; def s40
669; GFX9-NEXT:    ;;#ASMEND
670; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
671; GFX9-NEXT:    v_readlane_b32 s4, v40, 1
672; GFX9-NEXT:    ;;#ASMSTART
673; GFX9-NEXT:    ; use s40
674; GFX9-NEXT:    ;;#ASMEND
675; GFX9-NEXT:    v_readlane_b32 s5, v40, 2
676; GFX9-NEXT:    v_readlane_b32 s40, v40, 0
677; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
678; GFX9-NEXT:    v_readlane_b32 s33, v40, 3
679; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
680; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
681; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
682; GFX9-NEXT:    s_waitcnt vmcnt(0)
683; GFX9-NEXT:    s_setpc_b64 s[4:5]
684;
685; GFX10-LABEL: callee_saved_sgpr_kernel:
686; GFX10:       ; %bb.0:
687; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
689; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
690; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
691; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
692; GFX10-NEXT:    s_mov_b32 exec_lo, s4
693; GFX10-NEXT:    v_writelane_b32 v40, s33, 3
694; GFX10-NEXT:    s_mov_b32 s33, s32
695; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
696; GFX10-NEXT:    s_getpc_b64 s[4:5]
697; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
698; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
699; GFX10-NEXT:    v_writelane_b32 v40, s40, 0
700; GFX10-NEXT:    ;;#ASMSTART
701; GFX10-NEXT:    ; def s40
702; GFX10-NEXT:    ;;#ASMEND
703; GFX10-NEXT:    v_writelane_b32 v40, s30, 1
704; GFX10-NEXT:    v_writelane_b32 v40, s31, 2
705; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
706; GFX10-NEXT:    v_readlane_b32 s4, v40, 1
707; GFX10-NEXT:    ;;#ASMSTART
708; GFX10-NEXT:    ; use s40
709; GFX10-NEXT:    ;;#ASMEND
710; GFX10-NEXT:    v_readlane_b32 s5, v40, 2
711; GFX10-NEXT:    v_readlane_b32 s40, v40, 0
712; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
713; GFX10-NEXT:    v_readlane_b32 s33, v40, 3
714; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
715; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
716; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
717; GFX10-NEXT:    s_mov_b32 exec_lo, s6
718; GFX10-NEXT:    s_waitcnt vmcnt(0)
719; GFX10-NEXT:    s_setpc_b64 s[4:5]
720  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
721  call amdgpu_gfx void @external_void_func_void()
722  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
723  ret void
724}
725
726define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
727; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel:
728; GFX9:       ; %bb.0:
729; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730; GFX9-NEXT:    s_or_saveexec_b64 s[4:5], -1
731; GFX9-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
732; GFX9-NEXT:    s_mov_b64 exec, s[4:5]
733; GFX9-NEXT:    v_writelane_b32 v41, s33, 3
734; GFX9-NEXT:    v_writelane_b32 v41, s40, 0
735; GFX9-NEXT:    v_writelane_b32 v41, s30, 1
736; GFX9-NEXT:    s_mov_b32 s33, s32
737; GFX9-NEXT:    s_add_u32 s32, s32, 0x400
738; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
739; GFX9-NEXT:    ;;#ASMSTART
740; GFX9-NEXT:    ; def s40
741; GFX9-NEXT:    ;;#ASMEND
742; GFX9-NEXT:    ;;#ASMSTART
743; GFX9-NEXT:    ; def v32
744; GFX9-NEXT:    ;;#ASMEND
745; GFX9-NEXT:    s_getpc_b64 s[4:5]
746; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
747; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
748; GFX9-NEXT:    v_writelane_b32 v41, s31, 2
749; GFX9-NEXT:    v_mov_b32_e32 v40, v32
750; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
751; GFX9-NEXT:    ;;#ASMSTART
752; GFX9-NEXT:    ; use s40
753; GFX9-NEXT:    ;;#ASMEND
754; GFX9-NEXT:    ;;#ASMSTART
755; GFX9-NEXT:    ; use v40
756; GFX9-NEXT:    ;;#ASMEND
757; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
758; GFX9-NEXT:    v_readlane_b32 s4, v41, 1
759; GFX9-NEXT:    v_readlane_b32 s5, v41, 2
760; GFX9-NEXT:    v_readlane_b32 s40, v41, 0
761; GFX9-NEXT:    s_sub_u32 s32, s32, 0x400
762; GFX9-NEXT:    v_readlane_b32 s33, v41, 3
763; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
764; GFX9-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
765; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
766; GFX9-NEXT:    s_waitcnt vmcnt(0)
767; GFX9-NEXT:    s_setpc_b64 s[4:5]
768;
769; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel:
770; GFX10:       ; %bb.0:
771; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
773; GFX10-NEXT:    s_or_saveexec_b32 s4, -1
774; GFX10-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
775; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
776; GFX10-NEXT:    s_mov_b32 exec_lo, s4
777; GFX10-NEXT:    v_writelane_b32 v41, s33, 3
778; GFX10-NEXT:    s_mov_b32 s33, s32
779; GFX10-NEXT:    s_add_u32 s32, s32, 0x200
780; GFX10-NEXT:    s_getpc_b64 s[4:5]
781; GFX10-NEXT:    s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
782; GFX10-NEXT:    s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
783; GFX10-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
784; GFX10-NEXT:    v_writelane_b32 v41, s40, 0
785; GFX10-NEXT:    ;;#ASMSTART
786; GFX10-NEXT:    ; def s40
787; GFX10-NEXT:    ;;#ASMEND
788; GFX10-NEXT:    ;;#ASMSTART
789; GFX10-NEXT:    ; def v32
790; GFX10-NEXT:    ;;#ASMEND
791; GFX10-NEXT:    v_mov_b32_e32 v40, v32
792; GFX10-NEXT:    v_writelane_b32 v41, s30, 1
793; GFX10-NEXT:    v_writelane_b32 v41, s31, 2
794; GFX10-NEXT:    s_swappc_b64 s[30:31], s[4:5]
795; GFX10-NEXT:    ;;#ASMSTART
796; GFX10-NEXT:    ; use s40
797; GFX10-NEXT:    ;;#ASMEND
798; GFX10-NEXT:    ;;#ASMSTART
799; GFX10-NEXT:    ; use v40
800; GFX10-NEXT:    ;;#ASMEND
801; GFX10-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
802; GFX10-NEXT:    v_readlane_b32 s4, v41, 1
803; GFX10-NEXT:    v_readlane_b32 s5, v41, 2
804; GFX10-NEXT:    v_readlane_b32 s40, v41, 0
805; GFX10-NEXT:    s_sub_u32 s32, s32, 0x200
806; GFX10-NEXT:    v_readlane_b32 s33, v41, 3
807; GFX10-NEXT:    s_or_saveexec_b32 s6, -1
808; GFX10-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
809; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
810; GFX10-NEXT:    s_mov_b32 exec_lo, s6
811; GFX10-NEXT:    s_waitcnt vmcnt(0)
812; GFX10-NEXT:    s_setpc_b64 s[4:5]
813  %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
814  %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
815  call amdgpu_gfx void @external_void_func_void()
816  call void asm sideeffect "; use $0", "s"(i32 %s40) #0
817  call void asm sideeffect "; use $0", "v"(i32 %v32) #0
818  ret void
819}
820
821attributes #0 = { nounwind }
822attributes #1 = { nounwind noinline }
823