• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s -check-prefixes=CHECK,SI
2# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX9
3# RUN: llc -run-pass=si-insert-waitcnts -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10
4---
5# CHECK-LABEL: name: vccz_corrupt_workaround
6# CHECK: $vcc = V_CMP_EQ_F32
7# SI-NEXT: S_WAITCNT 127
8# SI-NEXT: $vcc = S_MOV_B64 $vcc
9# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit killed $vcc
10
11name: vccz_corrupt_workaround
12tracksRegLiveness: true
13body: |
14  bb.0:
15    liveins: $sgpr0_sgpr1
16
17    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
18    $sgpr7 = S_MOV_B32 61440
19    $sgpr6 = S_MOV_B32 -1
20    $vcc = V_CMP_EQ_F32_e64 0, 0, 0, undef $sgpr2, 0, implicit $mode, implicit $exec
21    S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
22
23  bb.2:
24    liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
25
26    $vgpr0 = V_MOV_B32_e32 9, implicit $exec
27    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
28    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
29    S_BRANCH %bb.3
30
31  bb.1:
32    liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
33
34    $vgpr0 = V_MOV_B32_e32 100, implicit $exec
35    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
36    $vgpr0 = V_MOV_B32_e32 1, implicit $exec
37
38  bb.3:
39    liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
40
41    $sgpr3 = S_MOV_B32 61440
42    $sgpr2 = S_MOV_B32 -1
43    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
44    S_ENDPGM 0
45
46...
47---
48# CHECK-LABEL: name: vccz_corrupt_undef_vcc
49# CHECK: BUFFER_STORE_DWORD_OFFSET
50# SI-NEXT: S_WAITCNT 3855
51# CHECK-NEXT: $vgpr0 = V_MOV_B32_e32
52
53name: vccz_corrupt_undef_vcc
54tracksRegLiveness: true
55body: |
56  bb.0:
57    liveins: $sgpr0_sgpr1
58
59    $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed $sgpr0_sgpr1, 11, 0, 0
60    $sgpr7 = S_MOV_B32 61440
61    $sgpr6 = S_MOV_B32 -1
62    S_CBRANCH_VCCZ %bb.1, implicit undef $vcc
63
64  bb.2:
65    liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
66
67    $vgpr0 = V_MOV_B32_e32 9, implicit $exec
68    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
69    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
70    S_BRANCH %bb.3
71
72  bb.1:
73    liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
74
75    $vgpr0 = V_MOV_B32_e32 100, implicit $exec
76    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
77    $vgpr0 = V_MOV_B32_e32 1, implicit $exec
78
79  bb.3:
80    liveins: $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
81
82    $sgpr3 = S_MOV_B32 61440
83    $sgpr2 = S_MOV_B32 -1
84    BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
85    S_ENDPGM 0
86
87...
88---
89# Test that after reloading vcc spilled to a vgpr, we insert any necessary
90# instructions to fix vccz.
91
92# CHECK-LABEL: name: reload_vcc_from_vgpr
93# CHECK: $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc
94# CHECK: $vcc_hi = V_READLANE_B32 $vgpr0, 9
95# SI:    $vcc = S_MOV_B64 $vcc
96# GFX9:  $vcc = S_MOV_B64 $vcc
97# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
98
99name: reload_vcc_from_vgpr
100body: |
101  bb.0:
102    $vcc_lo = V_READLANE_B32 $vgpr0, 8, implicit-def $vcc
103    $vcc_hi = V_READLANE_B32 $vgpr0, 9
104    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
105  bb.1:
106
107...
108---
109# Test that after reloading vcc spilled to memory, we insert any necessary
110# instructions to fix vccz.
111
112# CHECK-LABEL: name: reload_vcc_from_mem
113# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec
114# CHECK: $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
115# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec
116# CHECK: $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
117# SI:    $vcc = S_MOV_B64 $vcc
118# GFX9:  $vcc = S_MOV_B64 $vcc
119# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
120
121name: reload_vcc_from_mem
122body: |
123  bb.0:
124    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec
125    $vcc_lo = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
126    $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec
127    $vcc_hi = V_READFIRSTLANE_B32 killed $vgpr0, implicit $exec, implicit-def $vcc
128    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
129  bb.1:
130
131...
132---
133# Test that after inline asm that defines vcc_lo, we insert any necessary
134# instructions to fix vccz.
135
136# CHECK-LABEL: name: inlineasm_def_vcc_lo
137# CHECK: INLINEASM &"; def vcc_lo", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc_lo
138# SI:    $vcc = S_MOV_B64 $vcc
139# GFX9:  $vcc = S_MOV_B64 $vcc
140# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
141
142name: inlineasm_def_vcc_lo
143body: |
144  bb.0:
145    INLINEASM &"; def vcc_lo", 1, 10, implicit-def $vcc_lo
146    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
147  bb.1:
148
149...
150---
151# Test that after inline asm that defines vcc, no unnecessary instructions are
152# inserted to fix vccz.
153
154# CHECK-LABEL: name: inlineasm_def_vcc
155# CHECK: INLINEASM &"; def vcc", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc
156# CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
157
158name: inlineasm_def_vcc
159body: |
160  bb.0:
161    INLINEASM &"; def vcc", 1, 10, implicit-def $vcc
162    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
163  bb.1:
164
165...
166---
167# Test vcc definition in a previous basic block.
168
169# CHECK-LABEL: name: vcc_def_pred
170# CHECK: bb.1:
171# SI:  $vcc = S_MOV_B64 $vcc
172# GFX9:  $vcc = S_MOV_B64 $vcc
173# CHECK: S_CBRANCH_VCCZ %bb.2, implicit $vcc
174
175name: vcc_def_pred
176body: |
177  bb.0:
178    $vcc = S_MOV_B64 0
179  bb.1:
180    S_CBRANCH_VCCZ %bb.2, implicit $vcc
181  bb.2:
182
183...
184
185# Test various ways that the live range of vccz can overlap with the live range
186# of an outstanding smem load.
187
188---
189# CHECK-LABEL: name: load_wait_def_use
190# SI: S_WAITCNT 0
191# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
192# SI-NEXT: S_WAITCNT 127
193# SI-NEXT: $vcc = S_MOV_B64 0
194# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
195name: load_wait_def_use
196body: |
197  bb.0:
198    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
199    S_WAITCNT 127
200    $vcc = S_MOV_B64 0
201    S_CBRANCH_VCCZ %bb.1, implicit $vcc
202  bb.1:
203...
204
205---
206# CHECK-LABEL: name: load_wait_nop_def_use
207# SI: S_WAITCNT 0
208# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
209# SI-NEXT: S_WAITCNT 127
210# SI-NEXT: S_NOP 0
211# SI-NEXT: $vcc = S_MOV_B64 0
212# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
213name: load_wait_nop_def_use
214body: |
215  bb.0:
216    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
217    S_WAITCNT 127
218    S_NOP 0
219    $vcc = S_MOV_B64 0
220    S_CBRANCH_VCCZ %bb.1, implicit $vcc
221  bb.1:
222...
223
224---
225# CHECK-LABEL: name: load_def_wait_use
226# SI: S_WAITCNT 0
227# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
228# SI-NEXT: $vcc = S_MOV_B64 0
229# SI-NEXT: S_WAITCNT 127
230# SI-NEXT: $vcc = S_MOV_B64 $vcc
231# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
232name: load_def_wait_use
233body: |
234  bb.0:
235    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
236    $vcc = S_MOV_B64 0
237    S_WAITCNT 127
238    S_CBRANCH_VCCZ %bb.1, implicit $vcc
239  bb.1:
240...
241
242# CHECK-LABEL: name: load_def_wait_nop_use
243# SI: S_WAITCNT 0
244# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
245# SI-NEXT: $vcc = S_MOV_B64 0
246# SI-NEXT: S_WAITCNT 127
247# SI-NEXT: S_NOP 0
248# SI-NEXT: $vcc = S_MOV_B64 $vcc
249# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
250name: load_def_wait_nop_use
251body: |
252  bb.0:
253    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
254    $vcc = S_MOV_B64 0
255    S_WAITCNT 127
256    S_NOP 0
257    S_CBRANCH_VCCZ %bb.1, implicit $vcc
258  bb.1:
259...
260
261---
262# CHECK-LABEL: name: load_def_use
263# SI: S_WAITCNT 0
264# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
265# SI-NEXT: $vcc = S_MOV_B64 0
266# SI-NEXT: S_WAITCNT 127
267# SI-NEXT: $vcc = S_MOV_B64 $vcc
268# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
269name: load_def_use
270body: |
271  bb.0:
272    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
273    $vcc = S_MOV_B64 0
274    S_CBRANCH_VCCZ %bb.1, implicit $vcc
275  bb.1:
276...
277
278---
279# CHECK-LABEL: name: def_load_wait_use
280# SI: S_WAITCNT 0
281# SI-NEXT: $vcc = S_MOV_B64 0
282# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
283# SI-NEXT: S_WAITCNT 127
284# SI-NEXT: $vcc = S_MOV_B64 $vcc
285# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
286name: def_load_wait_use
287body: |
288  bb.0:
289    $vcc = S_MOV_B64 0
290    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
291    S_WAITCNT 127
292    S_CBRANCH_VCCZ %bb.1, implicit $vcc
293  bb.1:
294...
295
296---
297# CHECK-LABEL: name: def_load_wait_nop_use
298# SI: S_WAITCNT 0
299# SI-NEXT: $vcc = S_MOV_B64 0
300# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
301# SI-NEXT: S_WAITCNT 127
302# SI-NEXT: S_NOP 0
303# SI-NEXT: $vcc = S_MOV_B64 $vcc
304# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
305name: def_load_wait_nop_use
306body: |
307  bb.0:
308    $vcc = S_MOV_B64 0
309    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
310    S_WAITCNT 127
311    S_NOP 0
312    S_CBRANCH_VCCZ %bb.1, implicit $vcc
313  bb.1:
314...
315
316---
317# CHECK-LABEL: name: def_load_use
318# SI: S_WAITCNT 0
319# SI-NEXT: $vcc = S_MOV_B64 0
320# SI-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
321# SI-NEXT: S_WAITCNT 127
322# SI-NEXT: $vcc = S_MOV_B64 $vcc
323# SI-NEXT: S_CBRANCH_VCCZ %bb.1, implicit $vcc
324name: def_load_use
325body: |
326  bb.0:
327    $vcc = S_MOV_B64 0
328    $sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
329    S_CBRANCH_VCCZ %bb.1, implicit $vcc
330  bb.1:
331...
332