• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -O0 -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VGPR %s
2; RUN: llc -O0 -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-vgpr=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VMEM %s
3
4; GCN-LABEL: {{^}}spill_sgpr_x2:
5
6; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
7; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
8; VGPR: s_cbranch_scc1
9
10; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
11; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
12
13
14; VMEM: buffer_store_dword
15; VMEM: s_cbranch_scc1
16
17; VMEM: buffer_load_dword
18define amdgpu_kernel void @spill_sgpr_x2(i32 addrspace(1)* %out, i32 %in) #0 {
19  %wide.sgpr = call <2 x i32>  asm sideeffect "; def $0", "=s" () #0
20  %cmp = icmp eq i32 %in, 0
21  br i1 %cmp, label %bb0, label %ret
22
23bb0:
24  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr) #0
25  br label %ret
26
27ret:
28  ret void
29}
30
31; GCN-LABEL: {{^}}spill_sgpr_x3:
32
33; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
34; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
35; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
36; VGPR: s_cbranch_scc1
37
38; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
39; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
40; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
41
42
43; VMEM: buffer_store_dword
44; VMEM: s_cbranch_scc1
45
46; VMEM: buffer_load_dword
47define amdgpu_kernel void @spill_sgpr_x3(i32 addrspace(1)* %out, i32 %in) #0 {
48  %wide.sgpr = call <3 x i32>  asm sideeffect "; def $0", "=s" () #0
49  %cmp = icmp eq i32 %in, 0
50  br i1 %cmp, label %bb0, label %ret
51
52bb0:
53  call void asm sideeffect "; use $0", "s"(<3 x i32> %wide.sgpr) #0
54  br label %ret
55
56ret:
57  ret void
58}
59
60; GCN-LABEL: {{^}}spill_sgpr_x4:
61
62; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
63; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
64; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
65; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
66; VGPR: s_cbranch_scc1
67
68; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
69; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
70; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
71; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
72
73
74; VMEM: buffer_store_dword
75; VMEM: s_cbranch_scc1
76
77; VMEM: buffer_load_dword
78define amdgpu_kernel void @spill_sgpr_x4(i32 addrspace(1)* %out, i32 %in) #0 {
79  %wide.sgpr = call <4 x i32>  asm sideeffect "; def $0", "=s" () #0
80  %cmp = icmp eq i32 %in, 0
81  br i1 %cmp, label %bb0, label %ret
82
83bb0:
84  call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr) #0
85  br label %ret
86
87ret:
88  ret void
89}
90
91; GCN-LABEL: {{^}}spill_sgpr_x5:
92
93; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
94; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
95; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
96; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
97; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
98; VGPR: s_cbranch_scc1
99
100; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
101; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
102; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
103; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
104; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
105
106
107; VMEM: buffer_store_dword
108; VMEM: s_cbranch_scc1
109
110; VMEM: buffer_load_dword
111define amdgpu_kernel void @spill_sgpr_x5(i32 addrspace(1)* %out, i32 %in) #0 {
112  %wide.sgpr = call <5 x i32>  asm sideeffect "; def $0", "=s" () #0
113  %cmp = icmp eq i32 %in, 0
114  br i1 %cmp, label %bb0, label %ret
115
116bb0:
117  call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr) #0
118  br label %ret
119
120ret:
121  ret void
122}
123
124; GCN-LABEL: {{^}}spill_sgpr_x8:
125
126; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
127; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
128; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
129; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
130; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
131; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
132; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
133; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
134; VGPR: s_cbranch_scc1
135
136; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
137; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
138; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
139; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
140; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
141; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
142; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
143; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
144
145; VMEM: buffer_store_dword
146; VMEM: s_cbranch_scc1
147
148; VMEM: buffer_load_dword
149define amdgpu_kernel void @spill_sgpr_x8(i32 addrspace(1)* %out, i32 %in) #0 {
150  %wide.sgpr = call <8 x i32>  asm sideeffect "; def $0", "=s" () #0
151  %cmp = icmp eq i32 %in, 0
152  br i1 %cmp, label %bb0, label %ret
153
154bb0:
155  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr) #0
156  br label %ret
157
158ret:
159  ret void
160}
161
162; GCN-LABEL: {{^}}spill_sgpr_x16:
163
164; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
165; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
166; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
167; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
168; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
169; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
170; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
171; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
172; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
173; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
174; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
175; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
176; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
177; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
178; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
179; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
180; VGPR: s_cbranch_scc1
181
182; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
183; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
184; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
185; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
186; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
187; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
188; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
189; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
190; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
191; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
192; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
193; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
194; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
195; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
196; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
197; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
198
199; VMEM: buffer_store_dword
200; VMEM: s_cbranch_scc1
201
202; VMEM: buffer_load_dword
203define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 {
204  %wide.sgpr = call <16 x i32>  asm sideeffect "; def $0", "=s" () #0
205  %cmp = icmp eq i32 %in, 0
206  br i1 %cmp, label %bb0, label %ret
207
208bb0:
209  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0
210  br label %ret
211
212ret:
213 ret void
214}
215
216; GCN-LABEL: {{^}}spill_sgpr_x32:
217
218; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0
219; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1
220; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2
221; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3
222; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4
223; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5
224; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6
225; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7
226; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8
227; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9
228; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10
229; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11
230; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12
231; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13
232; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14
233; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15
234; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16
235; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17
236; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18
237; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19
238; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20
239; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21
240; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22
241; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23
242; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24
243; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25
244; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26
245; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27
246; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28
247; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29
248; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30
249; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31
250; VGPR: s_cbranch_scc1
251
252; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0
253; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1
254; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2
255; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3
256; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4
257; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5
258; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6
259; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7
260; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8
261; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9
262; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10
263; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11
264; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12
265; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13
266; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14
267; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15
268; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16
269; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17
270; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18
271; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19
272; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20
273; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21
274; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22
275; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23
276; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24
277; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25
278; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26
279; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27
280; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28
281; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29
282; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30
283; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31
284
285; VMEM: buffer_store_dword
286; VMEM: s_cbranch_scc1
287
288; VMEM: buffer_load_dword
289define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 {
290  %wide.sgpr = call <32 x i32>  asm sideeffect "; def $0", "=s" () #0
291  %cmp = icmp eq i32 %in, 0
292  br i1 %cmp, label %bb0, label %ret
293
294bb0:
295  call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0
296  br label %ret
297
298ret:
299 ret void
300}
301
302attributes #0 = { nounwind }
303