1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s 4 5define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { 6; SI-LABEL: break_inserted_outside_of_loop: 7; SI: ; %bb.0: ; %main_body 8; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 9; SI-NEXT: s_load_dword s0, s[0:1], 0xb 10; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 11; SI-NEXT: s_waitcnt lgkmcnt(0) 12; SI-NEXT: v_and_b32_e32 v0, s0, v0 13; SI-NEXT: v_and_b32_e32 v0, 1, v0 14; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 15; SI-NEXT: s_mov_b64 s[0:1], 0 16; SI-NEXT: BB0_1: ; %ENDIF 17; SI-NEXT: ; =>This Inner Loop Header: Depth=1 18; SI-NEXT: s_and_b64 s[2:3], exec, vcc 19; SI-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 20; SI-NEXT: s_andn2_b64 exec, exec, s[0:1] 21; SI-NEXT: s_cbranch_execnz BB0_1 22; SI-NEXT: ; %bb.2: ; %ENDLOOP 23; SI-NEXT: s_or_b64 exec, exec, s[0:1] 24; SI-NEXT: s_mov_b32 s7, 0xf000 25; SI-NEXT: s_mov_b32 s6, -1 26; SI-NEXT: v_mov_b32_e32 v0, 0 27; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 28; SI-NEXT: s_endpgm 29; 30; FLAT-LABEL: break_inserted_outside_of_loop: 31; FLAT: ; %bb.0: ; %main_body 32; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 33; FLAT-NEXT: s_load_dword s0, s[0:1], 0x2c 34; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 35; FLAT-NEXT: s_waitcnt lgkmcnt(0) 36; FLAT-NEXT: v_and_b32_e32 v0, s0, v0 37; FLAT-NEXT: v_and_b32_e32 v0, 1, v0 38; FLAT-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 39; FLAT-NEXT: s_mov_b64 s[0:1], 0 40; FLAT-NEXT: BB0_1: ; %ENDIF 41; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 42; FLAT-NEXT: s_and_b64 s[2:3], exec, vcc 43; FLAT-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 44; FLAT-NEXT: s_andn2_b64 exec, exec, s[0:1] 45; FLAT-NEXT: s_cbranch_execnz BB0_1 46; FLAT-NEXT: ; %bb.2: ; %ENDLOOP 47; FLAT-NEXT: s_or_b64 exec, exec, s[0:1] 48; FLAT-NEXT: s_mov_b32 s7, 0xf000 49; FLAT-NEXT: s_mov_b32 s6, -1 50; FLAT-NEXT: v_mov_b32_e32 v0, 0 51; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 52; FLAT-NEXT: s_endpgm 53main_body: 54 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 55 %0 = and i32 %a, %tid 56 %1 = trunc i32 %0 to i1 57 br label %ENDIF 58 59ENDLOOP: 60 store i32 0, i32 addrspace(1)* %out 61 ret void 62 63ENDIF: 64 br i1 %1, label %ENDLOOP, label %ENDIF 65} 66 67define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 68; SI-LABEL: phi_cond_outside_loop: 69; SI: ; %bb.0: ; %entry 70; SI-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 71; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 72; SI-NEXT: s_mov_b64 s[2:3], 0 73; SI-NEXT: s_mov_b64 s[4:5], 0 74; SI-NEXT: s_and_saveexec_b64 s[6:7], vcc 75; SI-NEXT: s_cbranch_execz BB1_2 76; SI-NEXT: ; %bb.1: ; %else 77; SI-NEXT: s_load_dword s0, s[0:1], 0x9 78; SI-NEXT: s_waitcnt lgkmcnt(0) 79; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 80; SI-NEXT: s_and_b64 s[4:5], s[0:1], exec 81; SI-NEXT: BB1_2: ; %endif 82; SI-NEXT: s_or_b64 exec, exec, s[6:7] 83; SI-NEXT: BB1_3: ; %loop 84; SI-NEXT: ; =>This Inner Loop Header: Depth=1 85; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 86; SI-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 87; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] 88; SI-NEXT: s_cbranch_execnz BB1_3 89; SI-NEXT: ; %bb.4: ; %exit 90; SI-NEXT: s_endpgm 91; 92; FLAT-LABEL: phi_cond_outside_loop: 93; FLAT: ; %bb.0: ; %entry 94; FLAT-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 95; FLAT-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 96; FLAT-NEXT: s_mov_b64 s[2:3], 0 97; FLAT-NEXT: s_mov_b64 s[4:5], 0 98; FLAT-NEXT: s_and_saveexec_b64 s[6:7], vcc 99; FLAT-NEXT: s_cbranch_execz BB1_2 100; FLAT-NEXT: ; %bb.1: ; %else 101; FLAT-NEXT: s_load_dword s0, s[0:1], 0x24 102; FLAT-NEXT: s_waitcnt lgkmcnt(0) 103; FLAT-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 104; FLAT-NEXT: s_and_b64 s[4:5], s[0:1], exec 105; FLAT-NEXT: BB1_2: ; %endif 106; FLAT-NEXT: s_or_b64 exec, exec, s[6:7] 107; FLAT-NEXT: BB1_3: ; %loop 108; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 109; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 110; FLAT-NEXT: s_or_b64 s[2:3], s[0:1], s[2:3] 111; FLAT-NEXT: s_andn2_b64 exec, exec, s[2:3] 112; FLAT-NEXT: s_cbranch_execnz BB1_3 113; FLAT-NEXT: ; %bb.4: ; %exit 114; FLAT-NEXT: s_endpgm 115entry: 116 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 117 %0 = icmp eq i32 %tid , 0 118 br i1 %0, label %if, label %else 119 120if: 121 br label %endif 122 123else: 124 %1 = icmp eq i32 %b, 0 125 br label %endif 126 127endif: 128 %2 = phi i1 [0, %if], [%1, %else] 129 br label %loop 130 131loop: 132 br i1 %2, label %exit, label %loop 133 134exit: 135 ret void 136} 137 138define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { 139; SI-LABEL: switch_unreachable: 140; SI: ; %bb.0: ; %centry 141; 142; FLAT-LABEL: switch_unreachable: 143; FLAT: ; %bb.0: ; %centry 144centry: 145 switch i32 %x, label %sw.default [ 146 i32 0, label %sw.bb 147 i32 60, label %sw.bb 148 ] 149 150sw.bb: 151 unreachable 152 153sw.default: 154 unreachable 155 156sw.epilog: 157 ret void 158} 159 160declare float @llvm.fabs.f32(float) nounwind readnone 161 162define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 163; SI-LABEL: loop_land_info_assert: 164; SI: ; %bb.0: ; %entry 165; SI-NEXT: s_mov_b32 s7, 0xf000 166; SI-NEXT: s_mov_b32 s6, -1 167; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 168; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 169; SI-NEXT: s_load_dword s8, s[0:1], 0xc 170; SI-NEXT: s_brev_b32 s9, 44 171; SI-NEXT: s_waitcnt lgkmcnt(0) 172; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 173; SI-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 174; SI-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3 175; SI-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 176; SI-NEXT: s_and_b64 s[0:1], exec, s[4:5] 177; SI-NEXT: s_and_b64 s[2:3], exec, s[2:3] 178; SI-NEXT: s_waitcnt vmcnt(0) 179; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 180; SI-NEXT: s_and_b64 s[4:5], exec, s[4:5] 181; SI-NEXT: v_mov_b32_e32 v0, 3 182; SI-NEXT: s_branch BB3_4 183; SI-NEXT: BB3_1: ; %Flow6 184; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 185; SI-NEXT: s_mov_b64 s[10:11], 0 186; SI-NEXT: BB3_2: ; %Flow5 187; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 188; SI-NEXT: s_mov_b64 s[14:15], 0 189; SI-NEXT: BB3_3: ; %Flow 190; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 191; SI-NEXT: s_and_b64 vcc, exec, s[12:13] 192; SI-NEXT: s_cbranch_vccnz BB3_8 193; SI-NEXT: BB3_4: ; %while.cond 194; SI-NEXT: ; =>This Inner Loop Header: Depth=1 195; SI-NEXT: s_mov_b64 s[14:15], -1 196; SI-NEXT: s_mov_b64 s[10:11], -1 197; SI-NEXT: s_mov_b64 s[12:13], -1 198; SI-NEXT: s_mov_b64 vcc, s[0:1] 199; SI-NEXT: s_cbranch_vccz BB3_3 200; SI-NEXT: ; %bb.5: ; %convex.exit 201; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 202; SI-NEXT: s_mov_b64 s[10:11], -1 203; SI-NEXT: s_mov_b64 s[12:13], -1 204; SI-NEXT: s_mov_b64 vcc, s[2:3] 205; SI-NEXT: s_cbranch_vccz BB3_2 206; SI-NEXT: ; %bb.6: ; %if.end 207; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 208; SI-NEXT: s_mov_b64 s[12:13], -1 209; SI-NEXT: s_mov_b64 vcc, s[4:5] 210; SI-NEXT: s_cbranch_vccz BB3_1 211; SI-NEXT: ; %bb.7: ; %if.else 212; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 213; SI-NEXT: s_mov_b64 s[12:13], 0 214; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 215; SI-NEXT: s_branch BB3_1 216; SI-NEXT: BB3_8: ; %loop.exit.guard4 217; SI-NEXT: ; in Loop: Header=BB3_4 Depth=1 218; SI-NEXT: s_and_b64 vcc, exec, s[10:11] 219; SI-NEXT: s_cbranch_vccz BB3_4 220; SI-NEXT: ; %bb.9: ; %loop.exit.guard 221; SI-NEXT: s_and_b64 vcc, exec, s[14:15] 222; SI-NEXT: s_cbranch_vccz BB3_13 223; SI-NEXT: ; %bb.10: ; %for.cond.preheader 224; SI-NEXT: s_cmpk_lt_i32 s8, 0x3e8 225; SI-NEXT: s_cbranch_scc0 BB3_13 226; SI-NEXT: ; %bb.11: ; %for.body 227; SI-NEXT: s_and_b64 vcc, exec, 0 228; SI-NEXT: BB3_12: ; %self.loop 229; SI-NEXT: ; =>This Inner Loop Header: Depth=1 230; SI-NEXT: s_mov_b64 vcc, vcc 231; SI-NEXT: s_cbranch_vccz BB3_12 232; SI-NEXT: BB3_13: ; %DummyReturnBlock 233; SI-NEXT: s_endpgm 234; 235; FLAT-LABEL: loop_land_info_assert: 236; FLAT: ; %bb.0: ; %entry 237; FLAT-NEXT: s_mov_b32 s7, 0xf000 238; FLAT-NEXT: s_mov_b32 s6, -1 239; FLAT-NEXT: buffer_load_dword v0, off, s[4:7], 0 240; FLAT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 241; FLAT-NEXT: s_load_dword s8, s[0:1], 0x30 242; FLAT-NEXT: s_brev_b32 s9, 44 243; FLAT-NEXT: s_waitcnt lgkmcnt(0) 244; FLAT-NEXT: v_cmp_lt_i32_e64 s[0:1], s2, 1 245; FLAT-NEXT: v_cmp_lt_i32_e64 s[4:5], s3, 4 246; FLAT-NEXT: v_cmp_gt_i32_e64 s[2:3], s3, 3 247; FLAT-NEXT: s_and_b64 s[2:3], s[0:1], s[2:3] 248; FLAT-NEXT: s_and_b64 s[0:1], exec, s[4:5] 249; FLAT-NEXT: s_and_b64 s[2:3], exec, s[2:3] 250; FLAT-NEXT: s_waitcnt vmcnt(0) 251; FLAT-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s9 252; FLAT-NEXT: s_and_b64 s[4:5], exec, s[4:5] 253; FLAT-NEXT: v_mov_b32_e32 v0, 3 254; FLAT-NEXT: s_branch BB3_4 255; FLAT-NEXT: BB3_1: ; %Flow6 256; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 257; FLAT-NEXT: s_mov_b64 s[10:11], 0 258; FLAT-NEXT: BB3_2: ; %Flow5 259; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 260; FLAT-NEXT: s_mov_b64 s[14:15], 0 261; FLAT-NEXT: BB3_3: ; %Flow 262; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 263; FLAT-NEXT: s_and_b64 vcc, exec, s[12:13] 264; FLAT-NEXT: s_cbranch_vccnz BB3_8 265; FLAT-NEXT: BB3_4: ; %while.cond 266; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 267; FLAT-NEXT: s_mov_b64 s[14:15], -1 268; FLAT-NEXT: s_mov_b64 s[10:11], -1 269; FLAT-NEXT: s_mov_b64 s[12:13], -1 270; FLAT-NEXT: s_mov_b64 vcc, s[0:1] 271; FLAT-NEXT: s_cbranch_vccz BB3_3 272; FLAT-NEXT: ; %bb.5: ; %convex.exit 273; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 274; FLAT-NEXT: s_mov_b64 s[10:11], -1 275; FLAT-NEXT: s_mov_b64 s[12:13], -1 276; FLAT-NEXT: s_mov_b64 vcc, s[2:3] 277; FLAT-NEXT: s_cbranch_vccz BB3_2 278; FLAT-NEXT: ; %bb.6: ; %if.end 279; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 280; FLAT-NEXT: s_mov_b64 s[12:13], -1 281; FLAT-NEXT: s_mov_b64 vcc, s[4:5] 282; FLAT-NEXT: s_cbranch_vccz BB3_1 283; FLAT-NEXT: ; %bb.7: ; %if.else 284; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 285; FLAT-NEXT: s_mov_b64 s[12:13], 0 286; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0 287; FLAT-NEXT: s_branch BB3_1 288; FLAT-NEXT: BB3_8: ; %loop.exit.guard4 289; FLAT-NEXT: ; in Loop: Header=BB3_4 Depth=1 290; FLAT-NEXT: s_and_b64 vcc, exec, s[10:11] 291; FLAT-NEXT: s_cbranch_vccz BB3_4 292; FLAT-NEXT: ; %bb.9: ; %loop.exit.guard 293; FLAT-NEXT: s_and_b64 vcc, exec, s[14:15] 294; FLAT-NEXT: s_cbranch_vccz BB3_13 295; FLAT-NEXT: ; %bb.10: ; %for.cond.preheader 296; FLAT-NEXT: s_cmpk_lt_i32 s8, 0x3e8 297; FLAT-NEXT: s_cbranch_scc0 BB3_13 298; FLAT-NEXT: ; %bb.11: ; %for.body 299; FLAT-NEXT: s_and_b64 vcc, exec, 0 300; FLAT-NEXT: BB3_12: ; %self.loop 301; FLAT-NEXT: ; =>This Inner Loop Header: Depth=1 302; FLAT-NEXT: s_mov_b64 vcc, vcc 303; FLAT-NEXT: s_cbranch_vccz BB3_12 304; FLAT-NEXT: BB3_13: ; %DummyReturnBlock 305; FLAT-NEXT: s_endpgm 306entry: 307 %cmp = icmp sgt i32 %c0, 0 308 br label %while.cond.outer 309 310while.cond.outer: 311 %tmp = load float, float addrspace(1)* undef 312 br label %while.cond 313 314while.cond: 315 %cmp1 = icmp slt i32 %c1, 4 316 br i1 %cmp1, label %convex.exit, label %for.cond 317 318convex.exit: 319 %or = or i1 %cmp, %cmp1 320 br i1 %or, label %return, label %if.end 321 322if.end: 323 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 324 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 325 br i1 %cmp2, label %if.else, label %while.cond.outer 326 327if.else: 328 store volatile i32 3, i32 addrspace(1)* undef, align 4 329 br label %while.cond 330 331for.cond: 332 %cmp3 = icmp slt i32 %c3, 1000 333 br i1 %cmp3, label %for.body, label %return 334 335for.body: 336 br i1 %cmp3, label %self.loop, label %if.end.2 337 338if.end.2: 339 %or.cond2 = or i1 %cmp3, %arg 340 br i1 %or.cond2, label %return, label %for.cond 341 342self.loop: 343 br label %self.loop 344 345return: 346 ret void 347} 348 349declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 350 351attributes #0 = { nounwind readnone } 352