1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s 2 3; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos: 4; CHECK-NEXT: ; %bb.0: 5; CHECK-NEXT: s_endpgm 6define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 7 call void @llvm.AMDGPU.kill(float 0.0) 8 ret void 9} 10 11; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg: 12; CHECK-NEXT: ; %bb.0: 13; CHECK-NEXT: s_mov_b64 exec, 0 14; CHECK-NEXT: ; %bb.1: 15; CHECK-NEXT: s_endpgm 16define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 17 call void @llvm.AMDGPU.kill(float -0.0) 18 ret void 19} 20 21; FIXME: Ideally only one would be emitted 22; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2: 23; CHECK-NEXT: ; %bb.0: 24; CHECK-NEXT: s_mov_b64 exec, 0 25; CHECK-NEXT: ; %bb.1: 26; CHECK-NEXT: s_mov_b64 exec, 0 27; CHECK-NEXT: ; %bb.2: 28; CHECK-NEXT: s_endpgm 29define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 30 call void @llvm.AMDGPU.kill(float -0.0) 31 call void @llvm.AMDGPU.kill(float -1.0) 32 ret void 33} 34 35; CHECK-LABEL: {{^}}test_kill_depth_var: 36; CHECK-NEXT: ; %bb.0: 37; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 38; CHECK-NEXT: ; %bb.1: 39; CHECK-NEXT: s_endpgm 40define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 41 call void @llvm.AMDGPU.kill(float %x) 42 ret void 43} 44 45; FIXME: Ideally only one would be emitted 46; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same: 47; CHECK-NEXT: ; %bb.0: 48; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 49; CHECK-NEXT: ; %bb.1: 50; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 51; CHECK-NEXT: ; %bb.2: 52; CHECK-NEXT: s_endpgm 53define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 54 call void @llvm.AMDGPU.kill(float %x) 55 call void @llvm.AMDGPU.kill(float %x) 56 ret void 57} 58 59; CHECK-LABEL: {{^}}test_kill_depth_var_x2: 60; CHECK-NEXT: ; %bb.0: 61; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 62; CHECK-NEXT: ; %bb.1: 63; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1 64; CHECK-NEXT: ; %bb.2: 65; CHECK-NEXT: s_endpgm 66define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 67 call void @llvm.AMDGPU.kill(float %x) 68 call void @llvm.AMDGPU.kill(float %y) 69 ret void 70} 71 72; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions: 73; CHECK-NEXT: ; %bb.0: 74; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 75; CHECK-NEXT: s_cbranch_execnz BB6_2 76; CHECK-NEXT: ; %bb.1: 77; CHECK-NEXT: exp 78; CHECK-NEXT: s_endpgm 79; CHECK-NEXT: BB6_2: 80; CHECK: v_mov_b32_e64 v7, -1 81; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 82; CHECK-NEXT: s_cbranch_execnz BB6_4 83; CHECK-NEXT: ; %bb.3: 84; CHECK-NEXT: exp 85; CHECK-NEXT: s_endpgm 86; CHECK-NEXT: BB6_4: 87; CHECK-NEXT: s_endpgm 88define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 89 call void @llvm.AMDGPU.kill(float %x) 90 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() 91 call void @llvm.AMDGPU.kill(float %y) 92 ret void 93} 94 95; FIXME: why does the skip depend on the asm length in the same block? 96 97; CHECK-LABEL: {{^}}test_kill_control_flow: 98; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0 99; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 100 101; CHECK-NEXT: ; %bb.1: 102; CHECK: v_mov_b32_e64 v7, -1 103; CHECK: v_nop_e64 104; CHECK: v_nop_e64 105; CHECK: v_nop_e64 106; CHECK: v_nop_e64 107; CHECK: v_nop_e64 108; CHECK: v_nop_e64 109; CHECK: v_nop_e64 110; CHECK: v_nop_e64 111; CHECK: v_nop_e64 112; CHECK: v_nop_e64 113 114; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 115; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 116; CHECK-NEXT: ; %bb.2: 117; CHECK-NEXT: exp null off, off, off, off done vm 118; CHECK-NEXT: s_endpgm 119 120; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 121; CHECK-NEXT: s_endpgm 122define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 { 123entry: 124 %cmp = icmp eq i32 %arg, 0 125 br i1 %cmp, label %bb, label %exit 126 127bb: 128 %var = call float asm sideeffect " 129 v_mov_b32_e64 v7, -1 130 v_nop_e64 131 v_nop_e64 132 v_nop_e64 133 v_nop_e64 134 v_nop_e64 135 v_nop_e64 136 v_nop_e64 137 v_nop_e64 138 v_nop_e64 139 v_nop_e64", "={v7}"() 140 call void @llvm.AMDGPU.kill(float %var) 141 br label %exit 142 143exit: 144 ret void 145} 146 147; CHECK-LABEL: {{^}}test_kill_control_flow_remainder: 148; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0 149; CHECK-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0 150; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 151 152; CHECK-NEXT: ; %bb.1: ; %bb 153; CHECK: v_mov_b32_e64 v7, -1 154; CHECK: v_nop_e64 155; CHECK: v_nop_e64 156; CHECK: v_nop_e64 157; CHECK: v_nop_e64 158; CHECK: v_nop_e64 159; CHECK: v_nop_e64 160; CHECK: v_nop_e64 161; CHECK: v_nop_e64 162; CHECK: ;;#ASMEND 163; CHECK: v_mov_b32_e64 v8, -1 164; CHECK: ;;#ASMEND 165; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 166; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 167 168; CHECK-NEXT: ; %bb.2: 169; CHECK-NEXT: exp null off, off, off, off done vm 170; CHECK-NEXT: s_endpgm 171 172; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 173; CHECK: buffer_store_dword v8 174; CHECK: v_mov_b32_e64 v9, -2 175 176; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}: 177; CHECK: buffer_store_dword v9 178; CHECK-NEXT: s_endpgm 179define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 180entry: 181 %cmp = icmp eq i32 %arg, 0 182 br i1 %cmp, label %bb, label %exit 183 184bb: 185 %var = call float asm sideeffect " 186 v_mov_b32_e64 v7, -1 187 v_nop_e64 188 v_nop_e64 189 v_nop_e64 190 v_nop_e64 191 v_nop_e64 192 v_nop_e64 193 v_nop_e64 194 v_nop_e64 195 v_nop_e64 196 v_nop_e64 197 v_nop_e64", "={v7}"() 198 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() 199 call void @llvm.AMDGPU.kill(float %var) 200 store volatile float %live.across, float addrspace(1)* undef 201 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() 202 br label %exit 203 204exit: 205 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 206 store float %phi, float addrspace(1)* undef 207 ret void 208} 209 210; CHECK-LABEL: {{^}}test_kill_divergent_loop: 211; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0 212; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc 213; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]] 214; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]] 215; CHECK-NEXT: s_cbranch_execz [[EXIT]] 216 217; CHECK: {{BB[0-9]+_[0-9]+}}: ; %bb.preheader 218; CHECK: s_mov_b32 219 220; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]: 221 222; CHECK: v_mov_b32_e64 v7, -1 223; CHECK: v_nop_e64 224; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 225 226; CHECK-NEXT: ; %bb.3: 227; CHECK: buffer_load_dword [[LOAD:v[0-9]+]] 228; CHECK: v_cmp_eq_u32_e32 vcc, 0, [[LOAD]] 229; CHECK-NEXT: s_and_b64 vcc, exec, vcc 230; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]] 231 232; CHECK-NEXT: {{^}}[[EXIT]]: 233; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]] 234; CHECK: buffer_store_dword 235; CHECK: s_endpgm 236define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 237entry: 238 %cmp = icmp eq i32 %arg, 0 239 br i1 %cmp, label %bb, label %exit 240 241bb: 242 %var = call float asm sideeffect " 243 v_mov_b32_e64 v7, -1 244 v_nop_e64 245 v_nop_e64 246 v_nop_e64 247 v_nop_e64 248 v_nop_e64 249 v_nop_e64 250 v_nop_e64 251 v_nop_e64 252 v_nop_e64 253 v_nop_e64", "={v7}"() 254 call void @llvm.AMDGPU.kill(float %var) 255 %vgpr = load volatile i32, i32 addrspace(1)* undef 256 %loop.cond = icmp eq i32 %vgpr, 0 257 br i1 %loop.cond, label %bb, label %exit 258 259exit: 260 store volatile i32 8, i32 addrspace(1)* undef 261 ret void 262} 263 264; bug 28550 265; CHECK-LABEL: {{^}}phi_use_def_before_kill: 266; CHECK: v_cndmask_b32_e64 [[PHIREG:v[0-9]+]], 0, -1.0, 267; CHECK: v_cmpx_le_f32_e32 vcc, 0, 268; CHECK-NEXT: s_cbranch_execnz [[BB4:BB[0-9]+_[0-9]+]] 269 270; CHECK: exp 271; CHECK-NEXT: s_endpgm 272 273; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]: 274; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]] 275 276; CHECK: [[PHIBB]]: 277; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] 278; CHECK: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] 279 280; CHECK: ; %bb10 281; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9 282; CHECK: buffer_store_dword 283 284; CHECK: [[ENDBB]]: 285; CHECK-NEXT: s_endpgm 286define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { 287bb: 288 %tmp = fadd float %x, 1.000000e+00 289 %tmp1 = fcmp olt float 0.000000e+00, %tmp 290 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 291 call void @llvm.AMDGPU.kill(float %tmp2) 292 br i1 undef, label %phibb, label %bb8 293 294phibb: 295 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ] 296 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00 297 br i1 %tmp6, label %bb10, label %end 298 299bb8: 300 store volatile i32 8, i32 addrspace(1)* undef 301 br label %phibb 302 303bb10: 304 store volatile i32 9, i32 addrspace(1)* undef 305 br label %end 306 307end: 308 ret void 309} 310 311; CHECK-LABEL: {{^}}no_skip_no_successors: 312; CHECK: v_cmp_nge_f32 313; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] 314 315; CHECK: ; %bb6 316; CHECK: s_mov_b64 exec, 0 317 318; CHECK: [[SKIPKILL]]: 319; CHECK: v_cmp_nge_f32_e32 vcc 320; CHECK: %bb.3: ; %bb5 321; CHECK-NEXT: .Lfunc_end{{[0-9]+}} 322define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 { 323bb: 324 %tmp = fcmp ult float %arg1, 0.000000e+00 325 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000 326 br i1 %tmp, label %bb6, label %bb3 327 328bb3: ; preds = %bb 329 br i1 %tmp2, label %bb5, label %bb4 330 331bb4: ; preds = %bb3 332 br i1 true, label %bb5, label %bb7 333 334bb5: ; preds = %bb4, %bb3 335 unreachable 336 337bb6: ; preds = %bb 338 call void @llvm.AMDGPU.kill(float -1.000000e+00) 339 unreachable 340 341bb7: ; preds = %bb4 342 ret void 343} 344 345; CHECK-LABEL: {{^}}if_after_kill_block: 346; CHECK: ; %bb.0: 347; CHECK: s_and_saveexec_b64 348; CHECK: s_xor_b64 349; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]] 350 351; CHECK: v_cmpx_le_f32_e32 vcc, 0, 352; CHECK: [[BB4]]: 353; CHECK: s_or_b64 exec, exec 354; CHECK: image_sample_c 355 356; CHECK: v_cmp_neq_f32_e32 vcc, 0, 357; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc 358; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]] 359; CHECK-NOT: branch 360 361; CHECK: BB{{[0-9]+_[0-9]+}}: ; %bb8 362; CHECK: buffer_store_dword 363 364; CHECK: [[END]]: 365; CHECK: s_endpgm 366define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 { 367bb: 368 %tmp = fcmp ult float %arg1, 0.000000e+00 369 br i1 %tmp, label %bb3, label %bb4 370 371bb3: ; preds = %bb 372 call void @llvm.AMDGPU.kill(float %arg) 373 br label %bb4 374 375bb4: ; preds = %bb3, %bb 376 %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) 377 %tmp6 = extractelement <4 x float> %tmp5, i32 0 378 %tmp7 = fcmp une float %tmp6, 0.000000e+00 379 br i1 %tmp7, label %bb8, label %bb9 380 381bb8: ; preds = %bb9, %bb4 382 store volatile i32 9, i32 addrspace(1)* undef 383 ret void 384 385bb9: ; preds = %bb4 386 ret void 387} 388 389declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 390declare void @llvm.AMDGPU.kill(float) #0 391 392attributes #0 = { nounwind } 393attributes #1 = { nounwind readonly } 394