1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s 2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4; Uses llvm.amdgcn.break 5 6; OPT-LABEL: @break_loop( 7; OPT: bb1: 8; OPT: call i64 @llvm.amdgcn.break(i64 9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 10 11; OPT: bb4: 12; OPT: load volatile 13; OPT: xor i1 %cmp1 14; OPT: call i64 @llvm.amdgcn.if.break( 15; OPT: br label %Flow 16 17; OPT: Flow: 18; OPT: call i1 @llvm.amdgcn.loop(i64 19; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1 20 21; OPT: bb9: 22; OPT: call void @llvm.amdgcn.end.cf(i64 23 24; TODO: Can remove exec fixes in return block 25; GCN-LABEL: {{^}}break_loop: 26; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} 27 28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 29; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]] 30; GCN: v_cmp_lt_i32_e32 vcc, -1 31; GCN: s_and_b64 vcc, exec, vcc 32; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]] 33 34; GCN: ; %bb.2: ; %bb4 35; GCN: buffer_load_dword 36; GCN: v_cmp_ge_i32_e32 vcc, 37; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]] 38 39; GCN: [[FLOW]]: 40; GCN: s_mov_b64 [[INITMASK]], [[MASK]] 41; GCN: s_andn2_b64 exec, exec, [[MASK]] 42; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] 43 44; GCN: ; %bb.4: ; %bb9 45; GCN-NEXT: s_endpgm 46define amdgpu_kernel void @break_loop(i32 %arg) #0 { 47bb: 48 %id = call i32 @llvm.amdgcn.workitem.id.x() 49 %tmp = sub i32 %id, %arg 50 br label %bb1 51 52bb1: 53 %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ] 54 %lsr.iv.next = add i32 %lsr.iv, 1 55 %cmp0 = icmp slt i32 %lsr.iv.next, 0 56 br i1 %cmp0, label %bb4, label %bb9 57 58bb4: 59 %load = load volatile i32, i32 addrspace(1)* undef, align 4 60 %cmp1 = icmp slt i32 %tmp, %load 61 br i1 %cmp1, label %bb1, label %bb9 62 63bb9: 64 ret void 65} 66 67; OPT-LABEL: @undef_phi_cond_break_loop( 68; OPT: bb1: 69; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 70; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 71; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken) 72; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 73 74; OPT: bb4: 75; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 76; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 77; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 78; OPT-NEXT: br label %Flow 79 80; OPT: Flow: 81; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] 82; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 83; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 84; OPT-NEXT: br i1 %2, label %bb9, label %bb1 85 86; OPT: bb9: ; preds = %Flow 87; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 88; OPT-NEXT: store volatile i32 7 89; OPT-NEXT: ret void 90define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 { 91bb: 92 %id = call i32 @llvm.amdgcn.workitem.id.x() 93 %tmp = sub i32 %id, %arg 94 br label %bb1 95 96bb1: ; preds = %Flow, %bb 97 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 98 %lsr.iv.next = add i32 %lsr.iv, 1 99 %cmp0 = icmp slt i32 %lsr.iv.next, 0 100 br i1 %cmp0, label %bb4, label %Flow 101 102bb4: ; preds = %bb1 103 %load = load volatile i32, i32 addrspace(1)* undef, align 4 104 %cmp1 = icmp sge i32 %tmp, %load 105 br label %Flow 106 107Flow: ; preds = %bb4, %bb1 108 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 109 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ] 110 br i1 %tmp3, label %bb9, label %bb1 111 112bb9: ; preds = %Flow 113 store volatile i32 7, i32 addrspace(3)* undef 114 ret void 115} 116 117; FIXME: ConstantExpr compare of address to null folds away 118@lds = addrspace(3) global i32 undef 119 120; OPT-LABEL: @constexpr_phi_cond_break_loop( 121; OPT: bb1: 122; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 123; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 124; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken) 125; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 126 127; OPT: bb4: 128; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 129; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 130; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 131; OPT-NEXT: br label %Flow 132 133; OPT: Flow: 134; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] 135; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 136; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 137; OPT-NEXT: br i1 %2, label %bb9, label %bb1 138 139; OPT: bb9: ; preds = %Flow 140; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 141; OPT-NEXT: store volatile i32 7 142; OPT-NEXT: ret void 143define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 { 144bb: 145 %id = call i32 @llvm.amdgcn.workitem.id.x() 146 %tmp = sub i32 %id, %arg 147 br label %bb1 148 149bb1: ; preds = %Flow, %bb 150 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 151 %lsr.iv.next = add i32 %lsr.iv, 1 152 %cmp0 = icmp slt i32 %lsr.iv.next, 0 153 br i1 %cmp0, label %bb4, label %Flow 154 155bb4: ; preds = %bb1 156 %load = load volatile i32, i32 addrspace(1)* undef, align 4 157 %cmp1 = icmp sge i32 %tmp, %load 158 br label %Flow 159 160Flow: ; preds = %bb4, %bb1 161 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 162 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ] 163 br i1 %tmp3, label %bb9, label %bb1 164 165bb9: ; preds = %Flow 166 store volatile i32 7, i32 addrspace(3)* undef 167 ret void 168} 169 170; OPT-LABEL: @true_phi_cond_break_loop( 171; OPT: bb1: 172; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 173; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 174; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken) 175; OPT: br i1 %cmp0, label %bb4, label %Flow 176 177; OPT: bb4: 178; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 179; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 180; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 181; OPT-NEXT: br label %Flow 182 183; OPT: Flow: 184; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ] 185; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 186; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 187; OPT-NEXT: br i1 %2, label %bb9, label %bb1 188 189; OPT: bb9: ; preds = %Flow 190; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 191; OPT-NEXT: store volatile i32 7 192; OPT-NEXT: ret void 193define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 { 194bb: 195 %id = call i32 @llvm.amdgcn.workitem.id.x() 196 %tmp = sub i32 %id, %arg 197 br label %bb1 198 199bb1: ; preds = %Flow, %bb 200 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 201 %lsr.iv.next = add i32 %lsr.iv, 1 202 %cmp0 = icmp slt i32 %lsr.iv.next, 0 203 br i1 %cmp0, label %bb4, label %Flow 204 205bb4: ; preds = %bb1 206 %load = load volatile i32, i32 addrspace(1)* undef, align 4 207 %cmp1 = icmp sge i32 %tmp, %load 208 br label %Flow 209 210Flow: ; preds = %bb4, %bb1 211 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 212 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 213 br i1 %tmp3, label %bb9, label %bb1 214 215bb9: ; preds = %Flow 216 store volatile i32 7, i32 addrspace(3)* undef 217 ret void 218} 219 220; OPT-LABEL: @false_phi_cond_break_loop( 221; OPT: bb1: 222; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ] 223; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 224; OPT-NOT: call 225; OPT: br i1 %cmp0, label %bb4, label %Flow 226 227; OPT: bb4: 228; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 229; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 230; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken) 231; OPT-NEXT: br label %Flow 232 233; OPT: Flow: 234; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ] 235; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 236; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) 237; OPT-NEXT: br i1 %1, label %bb9, label %bb1 238 239; OPT: bb9: ; preds = %Flow 240; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) 241; OPT-NEXT: store volatile i32 7 242; OPT-NEXT: ret void 243define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 { 244bb: 245 %id = call i32 @llvm.amdgcn.workitem.id.x() 246 %tmp = sub i32 %id, %arg 247 br label %bb1 248 249bb1: ; preds = %Flow, %bb 250 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 251 %lsr.iv.next = add i32 %lsr.iv, 1 252 %cmp0 = icmp slt i32 %lsr.iv.next, 0 253 br i1 %cmp0, label %bb4, label %Flow 254 255bb4: ; preds = %bb1 256 %load = load volatile i32, i32 addrspace(1)* undef, align 4 257 %cmp1 = icmp sge i32 %tmp, %load 258 br label %Flow 259 260Flow: ; preds = %bb4, %bb1 261 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 262 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ] 263 br i1 %tmp3, label %bb9, label %bb1 264 265bb9: ; preds = %Flow 266 store volatile i32 7, i32 addrspace(3)* undef 267 ret void 268} 269 270; Swap order of branches in flow block so that the true phi is 271; continue. 272 273; OPT-LABEL: @invert_true_phi_cond_break_loop( 274; OPT: bb1: 275; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ] 276; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 277; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1 278; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0 279; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow 280 281; OPT: bb4: 282; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4 283; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load 284; OPT-NEXT: br label %Flow 285 286; OPT: Flow: 287; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 288; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 289; OPT-NEXT: %0 = xor i1 %tmp3, true 290; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken) 291; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1) 292; OPT-NEXT: br i1 %2, label %bb9, label %bb1 293 294; OPT: bb9: 295; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1) 296; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef 297; OPT-NEXT: ret void 298define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 { 299bb: 300 %id = call i32 @llvm.amdgcn.workitem.id.x() 301 %tmp = sub i32 %id, %arg 302 br label %bb1 303 304bb1: ; preds = %Flow, %bb 305 %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ] 306 %lsr.iv.next = add i32 %lsr.iv, 1 307 %cmp0 = icmp slt i32 %lsr.iv.next, 0 308 br i1 %cmp0, label %bb4, label %Flow 309 310bb4: ; preds = %bb1 311 %load = load volatile i32, i32 addrspace(1)* undef, align 4 312 %cmp1 = icmp sge i32 %tmp, %load 313 br label %Flow 314 315Flow: ; preds = %bb4, %bb1 316 %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 317 %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ] 318 br i1 %tmp3, label %bb1, label %bb9 319 320bb9: ; preds = %Flow 321 store volatile i32 7, i32 addrspace(3)* undef 322 ret void 323} 324 325declare i32 @llvm.amdgcn.workitem.id.x() #1 326 327attributes #0 = { nounwind } 328attributes #1 = { nounwind readnone } 329