• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
2
3; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos:
4; CHECK-NEXT: ; %bb.0:
5; CHECK-NEXT: ; %bb.1:
6; CHECK-NEXT: s_endpgm
7define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
8  call void @llvm.amdgcn.kill(i1 true)
9  ret void
10}
11
12; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg:
13; CHECK-NEXT: ; %bb.0:
14; CHECK-NEXT: s_mov_b64 exec, 0
15; CHECK-NEXT: s_cbranch_execz BB1_2
16; CHECK-NEXT: ; %bb.1:
17; CHECK-NEXT: s_endpgm
18; CHECK-NEXT: BB1_2:
19; CHECK-NEXT: exp null off, off, off, off done vm
20; CHECK-NEXT: s_endpgm
21define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
22  call void @llvm.amdgcn.kill(i1 false)
23  ret void
24}
25
26; FIXME: Ideally only one would be emitted
27; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2:
28; CHECK-NEXT: ; %bb.0:
29; CHECK-NEXT: s_mov_b64 exec, 0
30; CHECK-NEXT: s_cbranch_execz BB2_3
31; CHECK-NEXT: ; %bb.1:
32; CHECK-NEXT: s_mov_b64 exec, 0
33; CHECK-NEXT: s_cbranch_execz BB2_3
34; CHECK-NEXT: ; %bb.2:
35; CHECK-NEXT: s_endpgm
36; CHECK-NEXT: BB2_3:
37; CHECK:      exp null
38; CHECK-NEXT: s_endpgm
39define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
40  call void @llvm.amdgcn.kill(i1 false)
41  call void @llvm.amdgcn.kill(i1 false)
42  ret void
43}
44
45; CHECK-LABEL: {{^}}test_kill_depth_var:
46; CHECK-NEXT: ; %bb.0:
47; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
48; CHECK-NEXT: s_cbranch_execz BB3_2
49; CHECK-NEXT: ; %bb.1:
50; CHECK-NEXT: s_endpgm
51; CHECK-NEXT: BB3_2:
52; CHECK:      exp null
53; CHECK-NEXT: s_endpgm
54define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
55  %cmp = fcmp olt float %x, 0.0
56  call void @llvm.amdgcn.kill(i1 %cmp)
57  ret void
58}
59
60; FIXME: Ideally only one would be emitted
61; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same:
62; CHECK-NEXT: ; %bb.0:
63; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
64; CHECK-NEXT: s_cbranch_execz BB4_3
65; CHECK-NEXT: ; %bb.1:
66; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
67; CHECK-NEXT: s_cbranch_execz BB4_3
68; CHECK-NEXT: ; %bb.2:
69; CHECK-NEXT: s_endpgm
70; CHECK-NEXT: BB4_3:
71; CHECK:      exp null
72; CHECK-NEXT: s_endpgm
73define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
74  %cmp = fcmp olt float %x, 0.0
75  call void @llvm.amdgcn.kill(i1 %cmp)
76  call void @llvm.amdgcn.kill(i1 %cmp)
77  ret void
78}
79
80; FIXME: Ideally only one early-exit would be emitted
81; CHECK-LABEL: {{^}}test_kill_depth_var_x2:
82; CHECK-NEXT: ; %bb.0:
83; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
84; CHECK-NEXT: s_cbranch_execz BB5_3
85; CHECK-NEXT: ; %bb.1
86; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v1
87; CHECK-NEXT: s_cbranch_execz BB5_3
88; CHECK-NEXT: ; %bb.2
89; CHECK-NEXT: s_endpgm
90; CHECK-NEXT: BB5_3:
91; CHECK:      exp null
92; CHECK-NEXT: s_endpgm
93define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
94  %cmp.x = fcmp olt float %x, 0.0
95  call void @llvm.amdgcn.kill(i1 %cmp.x)
96  %cmp.y = fcmp olt float %y, 0.0
97  call void @llvm.amdgcn.kill(i1 %cmp.y)
98  ret void
99}
100
101; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions:
102; CHECK-NEXT: ; %bb.0:
103; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
104; CHECK-NEXT: s_cbranch_execz BB6_3
105; CHECK-NEXT: ; %bb.1:
106; CHECK: v_mov_b32_e64 v7, -1
107; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7
108; CHECK-NEXT: s_cbranch_execz BB6_3
109; CHECK-NEXT: ; %bb.2:
110; CHECK-NEXT: s_endpgm
111; CHECK-NEXT: BB6_3:
112; CHECK-NEXT: exp null
113; CHECK-NEXT: s_endpgm
114define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
115  %cmp.x = fcmp olt float %x, 0.0
116  call void @llvm.amdgcn.kill(i1 %cmp.x)
117  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
118  %cmp.y = fcmp olt float %y, 0.0
119  call void @llvm.amdgcn.kill(i1 %cmp.y)
120  ret void
121}
122
123; FIXME: why does the skip depend on the asm length in the same block?
124
125; CHECK-LABEL: {{^}}test_kill_control_flow:
126; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
127; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
128
129; CHECK-NEXT: ; %bb.1:
130; CHECK: v_mov_b32_e64 v7, -1
131; CHECK: v_nop_e64
132; CHECK: v_nop_e64
133; CHECK: v_nop_e64
134; CHECK: v_nop_e64
135; CHECK: v_nop_e64
136; CHECK: v_nop_e64
137; CHECK: v_nop_e64
138; CHECK: v_nop_e64
139; CHECK: v_nop_e64
140; CHECK: v_nop_e64
141
142; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7
143
144; TODO: We could do an early-exit here (the branch above is uniform!)
145; CHECK-NOT: exp null
146
147; CHECK: v_mov_b32_e32 v0, 1.0
148define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
149entry:
150  %cmp = icmp eq i32 %arg, 0
151  br i1 %cmp, label %bb, label %exit
152
153bb:
154  %var = call float asm sideeffect "
155    v_mov_b32_e64 v7, -1
156    v_nop_e64
157    v_nop_e64
158    v_nop_e64
159    v_nop_e64
160    v_nop_e64
161    v_nop_e64
162    v_nop_e64
163    v_nop_e64
164    v_nop_e64
165    v_nop_e64", "={v7}"()
166  %cmp.var = fcmp olt float %var, 0.0
167  call void @llvm.amdgcn.kill(i1 %cmp.var)
168  br label %exit
169
170exit:
171  ret float 1.0
172}
173
174; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
175; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
176; CHECK-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
177; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
178
179; CHECK-NEXT: ; %bb.1: ; %bb
180; CHECK: v_mov_b32_e64 v7, -1
181; CHECK: v_nop_e64
182; CHECK: v_nop_e64
183; CHECK: v_nop_e64
184; CHECK: v_nop_e64
185; CHECK: v_nop_e64
186; CHECK: v_nop_e64
187; CHECK: v_nop_e64
188; CHECK: v_nop_e64
189; CHECK: ;;#ASMEND
190; CHECK: v_mov_b32_e64 v8, -1
191; CHECK: ;;#ASMEND
192; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7
193
194; TODO: We could do an early-exit here (the branch above is uniform!)
195; CHECK-NOT: exp null
196
197; CHECK: buffer_store_dword v8
198; CHECK: v_mov_b32_e64 v9, -2
199
200; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}:
201; CHECK: buffer_store_dword v9
202; CHECK-NEXT: s_endpgm
203define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
204entry:
205  %cmp = icmp eq i32 %arg, 0
206  br i1 %cmp, label %bb, label %exit
207
208bb:
209  %var = call float asm sideeffect "
210    v_mov_b32_e64 v7, -1
211    v_nop_e64
212    v_nop_e64
213    v_nop_e64
214    v_nop_e64
215    v_nop_e64
216    v_nop_e64
217    v_nop_e64
218    v_nop_e64
219    v_nop_e64
220    v_nop_e64
221    v_nop_e64", "={v7}"()
222  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
223  %cmp.var = fcmp olt float %var, 0.0
224  call void @llvm.amdgcn.kill(i1 %cmp.var)
225  store volatile float %live.across, float addrspace(1)* undef
226  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
227  br label %exit
228
229exit:
230  %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
231  store float %phi, float addrspace(1)* undef
232  ret void
233}
234
235; CHECK-LABEL: {{^}}test_kill_control_flow_return:
236
237; CHECK: v_cmp_eq_u32_e64 [[KILL_CC:s\[[0-9]+:[0-9]+\]]], s0, 1
238; CHECK: s_and_b64 exec, exec, s[2:3]
239; CHECK-NEXT: s_cbranch_execz [[EXIT_BB:BB[0-9]+_[0-9]+]]
240
241; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0
242; CHECK: s_cbranch_scc0 [[COND_BB:BB[0-9]+_[0-9]+]]
243; CHECK: s_branch [[RETURN_BB:BB[0-9]+_[0-9]+]]
244
245; CHECK: [[COND_BB]]:
246; CHECK: v_mov_b32_e64 v7, -1
247; CHECK: v_nop_e64
248; CHECK: v_nop_e64
249; CHECK: v_nop_e64
250; CHECK: v_nop_e64
251; CHECK: v_nop_e64
252; CHECK: v_nop_e64
253; CHECK: v_nop_e64
254; CHECK: v_nop_e64
255; CHECK: v_nop_e64
256; CHECK: v_nop_e64
257; CHECK: v_mov_b32_e32 v0, v7
258
259; CHECK: [[EXIT_BB]]:
260; CHECK-NEXT: exp null
261; CHECK-NEXT: s_endpgm
262
263; CHECK: [[RETURN_BB]]:
264define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
265entry:
266  %kill = icmp eq i32 %arg, 1
267  %cmp = icmp eq i32 %arg, 0
268  call void @llvm.amdgcn.kill(i1 %kill)
269  br i1 %cmp, label %bb, label %exit
270
271bb:
272  %var = call float asm sideeffect "
273    v_mov_b32_e64 v7, -1
274    v_nop_e64
275    v_nop_e64
276    v_nop_e64
277    v_nop_e64
278    v_nop_e64
279    v_nop_e64
280    v_nop_e64
281    v_nop_e64
282    v_nop_e64
283    v_nop_e64", "={v7}"()
284  br label %exit
285
286exit:
287  %ret = phi float [ %var, %bb ], [ 0.0, %entry ]
288  ret float %ret
289}
290
291; CHECK-LABEL: {{^}}test_kill_divergent_loop:
292; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0
293; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
294; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
295; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
296
297; CHECK: ; %bb.{{[0-9]+}}: ; %bb.preheader
298; CHECK: s_mov_b32
299
300; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
301
302; CHECK: v_mov_b32_e64 v7, -1
303; CHECK: v_nop_e64
304; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7
305
306; CHECK-NEXT: ; %bb.3:
307; CHECK: buffer_load_dword [[LOAD:v[0-9]+]]
308; CHECK: v_cmp_eq_u32_e32 vcc, 0, [[LOAD]]
309; CHECK-NEXT: s_and_b64 vcc, exec, vcc
310; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]]
311
312; CHECK-NEXT: {{^}}[[EXIT]]:
313; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]]
314; CHECK: buffer_store_dword
315; CHECK: s_endpgm
316define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
317entry:
318  %cmp = icmp eq i32 %arg, 0
319  br i1 %cmp, label %bb, label %exit
320
321bb:
322  %var = call float asm sideeffect "
323    v_mov_b32_e64 v7, -1
324    v_nop_e64
325    v_nop_e64
326    v_nop_e64
327    v_nop_e64
328    v_nop_e64
329    v_nop_e64
330    v_nop_e64
331    v_nop_e64
332    v_nop_e64
333    v_nop_e64", "={v7}"()
334  %cmp.var = fcmp olt float %var, 0.0
335  call void @llvm.amdgcn.kill(i1 %cmp.var)
336  %vgpr = load volatile i32, i32 addrspace(1)* undef
337  %loop.cond = icmp eq i32 %vgpr, 0
338  br i1 %loop.cond, label %bb, label %exit
339
340exit:
341  store volatile i32 8, i32 addrspace(1)* undef
342  ret void
343}
344
345; bug 28550
346; CHECK-LABEL: {{^}}phi_use_def_before_kill:
347; CHECK: v_cndmask_b32_e64 [[PHIREG:v[0-9]+]], 0, -1.0,
348; CHECK: v_cmpx_lt_f32_e32 vcc, 0,
349; CHECK-NEXT: s_cbranch_execz [[EXITBB:BB[0-9]+_[0-9]+]]
350
351; CHECK: ; %[[KILLBB:bb.[0-9]+]]:
352; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]]
353
354; CHECK: [[PHIBB]]:
355; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]]
356; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]]
357
358; CHECK: ; %bb10
359; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9
360; CHECK: buffer_store_dword
361
362; CHECK: [[ENDBB]]:
363; CHECK-NEXT: s_endpgm
364
365; CHECK: [[EXITBB]]:
366; CHECK: exp null
367; CHECK-NEXT: s_endpgm
368define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
369bb:
370  %tmp = fadd float %x, 1.000000e+00
371  %tmp1 = fcmp olt float 0.000000e+00, %tmp
372  %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
373  %cmp.tmp2 = fcmp olt float %tmp2, 0.0
374  call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
375  br i1 undef, label %phibb, label %bb8
376
377phibb:
378  %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
379  %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
380  br i1 %tmp6, label %bb10, label %end
381
382bb8:
383  store volatile i32 8, i32 addrspace(1)* undef
384  br label %phibb
385
386bb10:
387  store volatile i32 9, i32 addrspace(1)* undef
388  br label %end
389
390end:
391  ret void
392}
393
394; CHECK-LABEL: {{^}}no_skip_no_successors:
395; CHECK: v_cmp_nge_f32
396; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]]
397
398; CHECK: ; %bb6
399; CHECK: s_mov_b64 exec, 0
400
401; CHECK: [[SKIPKILL]]:
402; CHECK: v_cmp_nge_f32_e32 vcc
403; CHECK: %bb.3: ; %bb5
404; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
405define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
406bb:
407  %tmp = fcmp ult float %arg1, 0.000000e+00
408  %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
409  br i1 %tmp, label %bb6, label %bb3
410
411bb3:                                              ; preds = %bb
412  br i1 %tmp2, label %bb5, label %bb4
413
414bb4:                                              ; preds = %bb3
415  br i1 true, label %bb5, label %bb7
416
417bb5:                                              ; preds = %bb4, %bb3
418  unreachable
419
420bb6:                                              ; preds = %bb
421  call void @llvm.amdgcn.kill(i1 false)
422  unreachable
423
424bb7:                                              ; preds = %bb4
425  ret void
426}
427
428; CHECK-LABEL: {{^}}if_after_kill_block:
429; CHECK: ; %bb.0:
430; CHECK: s_and_saveexec_b64
431; CHECK: s_xor_b64
432
433; CHECK: v_cmpx_gt_f32_e32 vcc, 0,
434; CHECK: BB{{[0-9]+_[0-9]+}}:
435; CHECK: s_or_b64 exec, exec
436; CHECK: image_sample_c
437
438; CHECK: v_cmp_neq_f32_e32 vcc, 0,
439; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
440; CHECK-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
441; CHECK-NOT: branch
442
443; CHECK: ; %bb.{{[0-9]+}}: ; %bb8
444; CHECK: buffer_store_dword
445
446; CHECK: [[END]]:
447; CHECK: s_endpgm
448define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
449bb:
450  %tmp = fcmp ult float %arg1, 0.000000e+00
451  br i1 %tmp, label %bb3, label %bb4
452
453bb3:                                              ; preds = %bb
454  %cmp.arg = fcmp olt float %arg, 0.0
455  call void @llvm.amdgcn.kill(i1 %cmp.arg)
456  br label %bb4
457
458bb4:                                              ; preds = %bb3, %bb
459  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
460  %tmp6 = extractelement <4 x float> %tmp5, i32 0
461  %tmp7 = fcmp une float %tmp6, 0.000000e+00
462  br i1 %tmp7, label %bb8, label %bb9
463
464bb8:                                              ; preds = %bb9, %bb4
465  store volatile i32 9, i32 addrspace(1)* undef
466  ret void
467
468bb9:                                              ; preds = %bb4
469  ret void
470}
471
472; CHECK-LABEL: {{^}}cbranch_kill:
473; CHECK: ; %bb.{{[0-9]+}}: ; %export
474; CHECK-NEXT: s_or_b64
475; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
476; CHECK: [[EXIT]]:
477; CHECK-NEXT: exp null off, off, off, off done vm
478define amdgpu_ps void @cbranch_kill(i32 inreg %0, <2 x float> %1) {
479.entry:
480  %val0 = extractelement <2 x float> %1, i32 0
481  %val1 = extractelement <2 x float> %1, i32 1
482  %p0 = call float @llvm.amdgcn.interp.p1(float %val0, i32 immarg 0, i32 immarg 1, i32 %0) #2
483  %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %p0, float %p0, float %p0, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
484  %cond0 = fcmp ugt float %sample, 0.000000e+00
485  br i1 %cond0, label %live, label %kill
486
487kill:
488  call void @llvm.amdgcn.kill(i1 false)
489  br label %export
490
491live:
492  %i0 = call float @llvm.amdgcn.interp.p1(float %val0, i32 immarg 0, i32 immarg 0, i32 %0) #2
493  %i1 = call float @llvm.amdgcn.interp.p2(float %i0, float %val1, i32 immarg 0, i32 immarg 0, i32 %0) #2
494  %i2 = call float @llvm.amdgcn.interp.p1(float %val0, i32 immarg 1, i32 immarg 0, i32 %0) #2
495  %i3 = call float @llvm.amdgcn.interp.p2(float %i2, float %val1, i32 immarg 1, i32 immarg 0, i32 %0) #2
496  %scale.i0 = fmul reassoc nnan nsz arcp contract float %i0, %sample
497  %scale.i1 = fmul reassoc nnan nsz arcp contract float %i1, %sample
498  %scale.i2 = fmul reassoc nnan nsz arcp contract float %i2, %sample
499  %scale.i3 = fmul reassoc nnan nsz arcp contract float %i3, %sample
500  br label %export
501
502export:
503  %proxy.0.0 = phi float [ undef, %kill ], [ %scale.i0, %live ]
504  %proxy.0.1 = phi float [ undef, %kill ], [ %scale.i1, %live ]
505  %proxy.0.2 = phi float [ undef, %kill ], [ %scale.i2, %live ]
506  %proxy.0.3 = phi float [ undef, %kill ], [ %scale.i3, %live ]
507  %out.0 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %proxy.0.0, float %proxy.0.1) #2
508  %out.1 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %proxy.0.2, float %proxy.0.3) #2
509  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> %out.0, <2 x half> %out.1, i1 immarg true, i1 immarg true) #3
510  ret void
511}
512
513; CHECK-LABEL: {{^}}complex_loop:
514; CHECK: s_mov_b64 exec, 0
515; CHECK-NOT: exp null
516define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
517.entry:
518  %flaga = icmp sgt i32 %cmpa, 0
519  br i1 %flaga, label %.lr.ph, label %._crit_edge
520
521.lr.ph:
522  br label %hdr
523
524hdr:
525  %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
526  %flagb = icmp ugt i32 %ctr, %cmpb
527  br i1 %flagb, label %kill, label %latch
528
529kill:
530  call void @llvm.amdgcn.kill(i1 false)
531  br label %latch
532
533latch:
534  %ctr.next = add nuw nsw i32 %ctr, 1
535  %flagc = icmp slt i32 %ctr.next, %cmpc
536  br i1 %flagc, label %hdr, label %._crit_edge
537
538._crit_edge:
539  %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
540  %out = bitcast i32 %tmp to <2 x half>
541  call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> %out, <2 x half> undef, i1 immarg true, i1 immarg true)
542  ret void
543}
544
545; CHECK-LABEL: {{^}}skip_mode_switch:
546; CHECK: s_and_saveexec_b64
547; CHECK-NEXT: s_cbranch_execz
548; CHECK: s_setreg_imm32
549; CHECK: s_or_b64 exec, exec
550define void @skip_mode_switch(i32 %arg) {
551entry:
552  %cmp = icmp eq i32 %arg, 0
553  br i1 %cmp, label %bb.0, label %bb.1
554
555bb.0:
556  call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
557  br label %bb.1
558
559bb.1:
560  ret void
561}
562
563declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
564declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
565declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
566declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #2
567declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
568declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
569declare void @llvm.amdgcn.kill(i1) #0
570
571declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
572
573attributes #0 = { nounwind }
574attributes #1 = { nounwind readonly }
575attributes #2 = { nounwind readnone speculatable }
576attributes #3 = { inaccessiblememonly nounwind writeonly }
577