• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6; OPT-LABEL: @break_loop(
7; OPT: bb1:
8; OPT: call i64 @llvm.amdgcn.break(i64
9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
10
11; OPT: bb4:
12; OPT: load volatile
13; OPT: xor i1 %cmp1
14; OPT: call i64 @llvm.amdgcn.if.break(
15; OPT: br label %Flow
16
17; OPT: Flow:
18; OPT: call i1 @llvm.amdgcn.loop(i64
19; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
20
21; OPT: bb9:
22; OPT: call void @llvm.amdgcn.end.cf(i64
23
24; TODO: Can remove exec fixes in return block
25; GCN-LABEL: {{^}}break_loop:
26; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
27
28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]]
30; GCN: v_cmp_lt_i32_e32 vcc, -1
31; GCN: s_and_b64 vcc, exec, vcc
32; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
33
34; GCN: ; %bb.2: ; %bb4
35; GCN: buffer_load_dword
36; GCN: v_cmp_ge_i32_e32 vcc,
37; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]]
38
39; GCN: [[FLOW]]:
40; GCN: s_mov_b64 [[INITMASK]], [[MASK]]
41; GCN: s_andn2_b64 exec, exec, [[MASK]]
42; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
43
44; GCN: ; %bb.4: ; %bb9
45; GCN-NEXT: s_endpgm
46define amdgpu_kernel void @break_loop(i32 %arg) #0 {
47bb:
48  %id = call i32 @llvm.amdgcn.workitem.id.x()
49  %tmp = sub i32 %id, %arg
50  br label %bb1
51
52bb1:
53  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
54  %lsr.iv.next = add i32 %lsr.iv, 1
55  %cmp0 = icmp slt i32 %lsr.iv.next, 0
56  br i1 %cmp0, label %bb4, label %bb9
57
58bb4:
59  %load = load volatile i32, i32 addrspace(1)* undef, align 4
60  %cmp1 = icmp slt i32 %tmp, %load
61  br i1 %cmp1, label %bb1, label %bb9
62
63bb9:
64  ret void
65}
66
67; OPT-LABEL: @undef_phi_cond_break_loop(
68; OPT: bb1:
69; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
70; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
71; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken)
72; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
73
74; OPT: bb4:
75; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
76; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
77; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
78; OPT-NEXT: br label %Flow
79
80; OPT: Flow:
81; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
82; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
83; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
84; OPT-NEXT: br i1 %2, label %bb9, label %bb1
85
86; OPT: bb9:                                              ; preds = %Flow
87; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
88; OPT-NEXT: store volatile i32 7
89; OPT-NEXT: ret void
90define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
91bb:
92  %id = call i32 @llvm.amdgcn.workitem.id.x()
93  %tmp = sub i32 %id, %arg
94  br label %bb1
95
96bb1:                                              ; preds = %Flow, %bb
97  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
98  %lsr.iv.next = add i32 %lsr.iv, 1
99  %cmp0 = icmp slt i32 %lsr.iv.next, 0
100  br i1 %cmp0, label %bb4, label %Flow
101
102bb4:                                              ; preds = %bb1
103  %load = load volatile i32, i32 addrspace(1)* undef, align 4
104  %cmp1 = icmp sge i32 %tmp, %load
105  br label %Flow
106
107Flow:                                             ; preds = %bb4, %bb1
108  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
109  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
110  br i1 %tmp3, label %bb9, label %bb1
111
112bb9:                                              ; preds = %Flow
113  store volatile i32 7, i32 addrspace(3)* undef
114  ret void
115}
116
117; FIXME: ConstantExpr compare of address to null folds away
118@lds = addrspace(3) global i32 undef
119
120; OPT-LABEL: @constexpr_phi_cond_break_loop(
121; OPT: bb1:
122; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
123; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
124; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken)
125; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
126
127; OPT: bb4:
128; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
129; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
130; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
131; OPT-NEXT: br label %Flow
132
133; OPT: Flow:
134; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
135; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
136; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
137; OPT-NEXT: br i1 %2, label %bb9, label %bb1
138
139; OPT: bb9:                                              ; preds = %Flow
140; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
141; OPT-NEXT: store volatile i32 7
142; OPT-NEXT: ret void
143define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
144bb:
145  %id = call i32 @llvm.amdgcn.workitem.id.x()
146  %tmp = sub i32 %id, %arg
147  br label %bb1
148
149bb1:                                              ; preds = %Flow, %bb
150  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
151  %lsr.iv.next = add i32 %lsr.iv, 1
152  %cmp0 = icmp slt i32 %lsr.iv.next, 0
153  br i1 %cmp0, label %bb4, label %Flow
154
155bb4:                                              ; preds = %bb1
156  %load = load volatile i32, i32 addrspace(1)* undef, align 4
157  %cmp1 = icmp sge i32 %tmp, %load
158  br label %Flow
159
160Flow:                                             ; preds = %bb4, %bb1
161  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
162  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
163  br i1 %tmp3, label %bb9, label %bb1
164
165bb9:                                              ; preds = %Flow
166  store volatile i32 7, i32 addrspace(3)* undef
167  ret void
168}
169
170; OPT-LABEL: @true_phi_cond_break_loop(
171; OPT: bb1:
172; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
173; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
174; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken)
175; OPT: br i1 %cmp0, label %bb4, label %Flow
176
177; OPT: bb4:
178; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
179; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
180; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
181; OPT-NEXT: br label %Flow
182
183; OPT: Flow:
184; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
185; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
186; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
187; OPT-NEXT: br i1 %2, label %bb9, label %bb1
188
189; OPT: bb9:                                              ; preds = %Flow
190; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
191; OPT-NEXT: store volatile i32 7
192; OPT-NEXT: ret void
193define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
194bb:
195  %id = call i32 @llvm.amdgcn.workitem.id.x()
196  %tmp = sub i32 %id, %arg
197  br label %bb1
198
199bb1:                                              ; preds = %Flow, %bb
200  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
201  %lsr.iv.next = add i32 %lsr.iv, 1
202  %cmp0 = icmp slt i32 %lsr.iv.next, 0
203  br i1 %cmp0, label %bb4, label %Flow
204
205bb4:                                              ; preds = %bb1
206  %load = load volatile i32, i32 addrspace(1)* undef, align 4
207  %cmp1 = icmp sge i32 %tmp, %load
208  br label %Flow
209
210Flow:                                             ; preds = %bb4, %bb1
211  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
212  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
213  br i1 %tmp3, label %bb9, label %bb1
214
215bb9:                                              ; preds = %Flow
216  store volatile i32 7, i32 addrspace(3)* undef
217  ret void
218}
219
220; OPT-LABEL: @false_phi_cond_break_loop(
221; OPT: bb1:
222; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
223; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
224; OPT-NOT: call
225; OPT: br i1 %cmp0, label %bb4, label %Flow
226
227; OPT: bb4:
228; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
229; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
230; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
231; OPT-NEXT: br label %Flow
232
233; OPT: Flow:
234; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ]
235; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
236; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
237; OPT-NEXT: br i1 %1, label %bb9, label %bb1
238
239; OPT: bb9:                                              ; preds = %Flow
240; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
241; OPT-NEXT: store volatile i32 7
242; OPT-NEXT: ret void
243define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
244bb:
245  %id = call i32 @llvm.amdgcn.workitem.id.x()
246  %tmp = sub i32 %id, %arg
247  br label %bb1
248
249bb1:                                              ; preds = %Flow, %bb
250  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
251  %lsr.iv.next = add i32 %lsr.iv, 1
252  %cmp0 = icmp slt i32 %lsr.iv.next, 0
253  br i1 %cmp0, label %bb4, label %Flow
254
255bb4:                                              ; preds = %bb1
256  %load = load volatile i32, i32 addrspace(1)* undef, align 4
257  %cmp1 = icmp sge i32 %tmp, %load
258  br label %Flow
259
260Flow:                                             ; preds = %bb4, %bb1
261  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
262  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
263  br i1 %tmp3, label %bb9, label %bb1
264
265bb9:                                              ; preds = %Flow
266  store volatile i32 7, i32 addrspace(3)* undef
267  ret void
268}
269
270; Swap order of branches in flow block so that the true phi is
271; continue.
272
273; OPT-LABEL: @invert_true_phi_cond_break_loop(
274; OPT: bb1:
275; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
276; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
277; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
278; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
279; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
280
281; OPT: bb4:
282; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
283; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
284; OPT-NEXT: br label %Flow
285
286; OPT: Flow:
287; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
288; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
289; OPT-NEXT: %0 = xor i1 %tmp3, true
290; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken)
291; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1)
292; OPT-NEXT: br i1 %2, label %bb9, label %bb1
293
294; OPT: bb9:
295; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1)
296; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
297; OPT-NEXT: ret void
298define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
299bb:
300  %id = call i32 @llvm.amdgcn.workitem.id.x()
301  %tmp = sub i32 %id, %arg
302  br label %bb1
303
304bb1:                                              ; preds = %Flow, %bb
305  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
306  %lsr.iv.next = add i32 %lsr.iv, 1
307  %cmp0 = icmp slt i32 %lsr.iv.next, 0
308  br i1 %cmp0, label %bb4, label %Flow
309
310bb4:                                              ; preds = %bb1
311  %load = load volatile i32, i32 addrspace(1)* undef, align 4
312  %cmp1 = icmp sge i32 %tmp, %load
313  br label %Flow
314
315Flow:                                             ; preds = %bb4, %bb1
316  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
317  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
318  br i1 %tmp3, label %bb1, label %bb9
319
320bb9:                                              ; preds = %Flow
321  store volatile i32 7, i32 addrspace(3)* undef
322  ret void
323}
324
325declare i32 @llvm.amdgcn.workitem.id.x() #1
326
327attributes #0 = { nounwind }
328attributes #1 = { nounwind readnone }
329