1; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \ 2; RUN: FileCheck %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi %s -o - | \ 4; RUN: FileCheck %s --check-prefix=CHECK-LLC 5; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | \ 6; RUN: llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL 7; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \ 8; RUN: -pass-remarks-analysis=hardware-loops %s -S -o - 2>&1 | \ 9; RUN: FileCheck %s --check-prefix=CHECK-REMARKS 10 11 12; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 13; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported 14; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 15; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 16; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 17; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 18; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate 19; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported 20; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 21; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 22 23 24; CHECK-LABEL: early_exit 25; CHECK-NOT: llvm.set.loop.iterations 26; CHECK-NOT: llvm.loop.decrement 27define i32 @early_exit(i32* nocapture readonly %a, i32 %max, i32 %n) { 28entry: 29 br label %do.body 30 31do.body: 32 %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] 33 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.0 34 %0 = load i32, i32* %arrayidx, align 4 35 %cmp = icmp sgt i32 %0, %max 36 br i1 %cmp, label %do.end, label %if.end 37 38if.end: 39 %inc = add nuw i32 %i.0, 1 40 %cmp1 = icmp ult i32 %inc, %n 41 br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge 42 43if.end.do.end_crit_edge: 44 %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i32 %inc 45 %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4 46 br label %do.end 47 48do.end: 49 %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ] 50 ret i32 %1 51} 52 53; CHECK-LABEL: nested 54; CHECK-NOT: call i32 @llvm.start.loop.iterations.i32(i32 %N) 55; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 56 57; CHECK: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 %N) 58; CHECK: br label %while.body3.us 59 60; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[START]], %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ] 61; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[REM]], i32 1) 62; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 63; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us 64 65; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1) 66; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7 67 68; CHECK-LLC: nested: 69; CHECK-LLC-NOT: mov lr, r1 70; CHECK-LLC: dls lr, r1 71; CHECK-LLC-NOT: mov lr, r1 72; CHECK-LLC: [[LOOP_HEADER:\.LBB[0-9._]+]]: 73; CHECK-LLC: le lr, [[LOOP_HEADER]] 74; CHECK-LLC-NOT: b [[LOOP_EXIT:\.LBB[0-9._]+]] 75; CHECK-LLC: [[LOOP_EXIT:\.LBB[0-9._]+]]: 76 77define void @nested(i32* nocapture %A, i32 %N) { 78entry: 79 %cmp20 = icmp eq i32 %N, 0 80 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 81 82while.cond1.preheader.us: 83 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 84 %mul.us = mul i32 %i.021.us, %N 85 br label %while.body3.us 86 87while.body3.us: 88 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 89 %add.us = add i32 %j.019.us, %mul.us 90 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us 91 store i32 %add.us, i32* %arrayidx.us, align 4 92 %inc.us = add nuw i32 %j.019.us, 1 93 %exitcond = icmp eq i32 %inc.us, %N 94 br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us 95 96while.cond1.while.end_crit_edge.us: 97 %inc6.us = add nuw i32 %i.021.us, 1 98 %exitcond23 = icmp eq i32 %inc6.us, %N 99 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 100 101while.end7: 102 ret void 103} 104 105; CHECK-LABEL: pre_existing 106; CHECK: llvm.start.loop.iterations 107; CHECK-NOT: llvm.start.loop.iterations 108; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 109; CHECK-NOT: call i32 @llvm.loop.decrement.reg 110define i32 @pre_existing(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { 111entry: 112 %start = call i32 @llvm.start.loop.iterations.i32(i32 %n) 113 br label %while.body 114 115while.body: ; preds = %while.body, %entry 116 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %entry ] 117 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %entry ] 118 %0 = phi i32 [ %start, %entry ], [ %2, %while.body ] 119 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1 120 %1 = load i32, i32* %q.addr.05, align 4 121 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1 122 store i32 %1, i32* %p.addr.04, align 4 123 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 124 %3 = icmp ne i32 %2, 0 125 br i1 %3, label %while.body, label %while.end 126 127while.end: ; preds = %while.body 128 ret i32 0 129} 130 131; CHECK-LABEL: pre_existing_test_set 132; CHECK: call i1 @llvm.test.set.loop.iterations 133; CHECK-NOT: llvm.set{{.*}}.loop.iterations 134; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 135; CHECK-NOT: call i32 @llvm.loop.decrement.reg 136define i32 @pre_existing_test_set(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { 137entry: 138 %guard = call i1 @llvm.test.set.loop.iterations.i32(i32 %n) 139 br i1 %guard, label %while.preheader, label %while.end 140 141while.preheader: 142 br label %while.body 143 144while.body: ; preds = %while.body, %entry 145 %q.addr.05 = phi i32* [ %incdec.ptr, %while.body ], [ %q, %while.preheader ] 146 %p.addr.04 = phi i32* [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ] 147 %0 = phi i32 [ %n, %while.preheader ], [ %2, %while.body ] 148 %incdec.ptr = getelementptr inbounds i32, i32* %q.addr.05, i32 1 149 %1 = load i32, i32* %q.addr.05, align 4 150 %incdec.ptr1 = getelementptr inbounds i32, i32* %p.addr.04, i32 1 151 store i32 %1, i32* %p.addr.04, align 4 152 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 153 %3 = icmp ne i32 %2, 0 154 br i1 %3, label %while.body, label %while.end 155 156while.end: ; preds = %while.body 157 ret i32 0 158} 159 160; CHECK-LABEL: pre_existing_inner 161; CHECK-NOT: llvm.start.loop.iterations 162; CHECK: while.cond1.preheader.us: 163; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 164; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 165; CHECK: br i1 166; CHECK-NOT: call i32 @llvm.loop.decrement 167define void @pre_existing_inner(i32* nocapture %A, i32 %N) { 168entry: 169 %cmp20 = icmp eq i32 %N, 0 170 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 171 172while.cond1.preheader.us: 173 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 174 %mul.us = mul i32 %i.021.us, %N 175 %start = call i32 @llvm.start.loop.iterations.i32(i32 %N) 176 br label %while.body3.us 177 178while.body3.us: 179 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 180 %0 = phi i32 [ %start, %while.cond1.preheader.us ], [ %1, %while.body3.us ] 181 %add.us = add i32 %j.019.us, %mul.us 182 %arrayidx.us = getelementptr inbounds i32, i32* %A, i32 %add.us 183 store i32 %add.us, i32* %arrayidx.us, align 4 184 %inc.us = add nuw i32 %j.019.us, 1 185 %1 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 186 %2 = icmp ne i32 %1, 0 187 br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us 188 189while.cond1.while.end_crit_edge.us: 190 %inc6.us = add nuw i32 %i.021.us, 1 191 %exitcond23 = icmp eq i32 %inc6.us, %N 192 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 193 194while.end7: 195 ret void 196} 197 198; CHECK-LABEL: not_rotated 199; CHECK-NOT: call i32 @llvm.start.loop.iterations 200; CHECK-NOT: call i32 @llvm.loop.decrement.i32 201define void @not_rotated(i32, i16* nocapture, i16 signext) { 202 br label %4 203 2044: 205 %5 = phi i32 [ 0, %3 ], [ %19, %18 ] 206 %6 = icmp eq i32 %5, %0 207 br i1 %6, label %20, label %7 208 2097: 210 %8 = mul i32 %5, %0 211 br label %9 212 2139: 214 %10 = phi i32 [ %17, %12 ], [ 0, %7 ] 215 %11 = icmp eq i32 %10, %0 216 br i1 %11, label %18, label %12 217 21812: 219 %13 = add i32 %10, %8 220 %14 = getelementptr inbounds i16, i16* %1, i32 %13 221 %15 = load i16, i16* %14, align 2 222 %16 = add i16 %15, %2 223 store i16 %16, i16* %14, align 2 224 %17 = add i32 %10, 1 225 br label %9 226 22718: 228 %19 = add i32 %5, 1 229 br label %4 230 23120: 232 ret void 233} 234 235; CHECK-LABEL: multi_latch 236; CHECK-NOT: call i32 @llvm.start.loop.iterations 237; CHECK-NOT: call i32 @llvm.loop.decrement 238define void @multi_latch(i32* %a, i32* %b, i32 %N) { 239entry: 240 %half = lshr i32 %N, 1 241 br label %header 242 243header: 244 %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ] 245 %cmp = icmp ult i32 %iv, %half 246 %addr.a = getelementptr i32, i32* %a, i32 %iv 247 %addr.b = getelementptr i32, i32* %b, i32 %iv 248 br i1 %cmp, label %if.then, label %if.else 249 250if.then: 251 store i32 %iv, i32* %addr.a 252 br label %latch.0 253 254if.else: 255 store i32 %iv, i32* %addr.b 256 br label %latch.0 257 258latch.0: 259 %count.next = add nuw i32 %iv, 1 260 %cmp.1 = icmp ult i32 %count.next, %half 261 br i1 %cmp.1, label %header, label %latch.1 262 263latch.1: 264 %ld = load i32, i32* %addr.a 265 store i32 %ld, i32* %addr.b 266 %cmp.2 = icmp ult i32 %count.next, %N 267 br i1 %cmp.2, label %header, label %latch.1 268 269exit: 270 ret void 271} 272 273; CHECK-LABEL: search 274; CHECK: entry: 275; CHECK: [[TEST:%[^ ]+]] = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) 276; CHECK: br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup 277; CHECK: for.body.preheader: 278; CHECK: br label %for.body 279; CHECK: for.body: 280; CHECK: for.inc: 281; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32( 282; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 283; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup 284define i32 @search(i8* nocapture readonly %c, i32 %N) { 285entry: 286 %cmp11 = icmp eq i32 %N, 0 287 br i1 %cmp11, label %for.cond.cleanup, label %for.body 288 289for.cond.cleanup: 290 %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ] 291 %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ] 292 %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa 293 ret i32 %sub 294 295for.body: 296 %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ] 297 %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ] 298 %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ] 299 %arrayidx = getelementptr inbounds i8, i8* %c, i32 %i.014 300 %0 = load i8, i8* %arrayidx, align 1 301 switch i8 %0, label %for.inc [ 302 i8 108, label %sw.bb 303 i8 111, label %sw.bb 304 i8 112, label %sw.bb 305 i8 32, label %sw.bb1 306 ] 307 308sw.bb: ; preds = %for.body, %for.body, %for.body 309 %inc = add nsw i32 %found.012, 1 310 br label %for.inc 311 312sw.bb1: ; preds = %for.body 313 %inc2 = add nsw i32 %spaces.013, 1 314 br label %for.inc 315 316for.inc: ; preds = %sw.bb, %sw.bb1, %for.body 317 %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ] 318 %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ] 319 %inc3 = add nuw i32 %i.014, 1 320 %exitcond = icmp eq i32 %inc3, %N 321 br i1 %exitcond, label %for.cond.cleanup, label %for.body 322} 323 324; CHECK-LABEL: unroll_inc_int 325; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 326; CHECK: call i32 @llvm.loop.decrement.reg.i32( 327 328; TODO: We should be able to support the unrolled loop body. 329; CHECK-UNROLL-LABEL: unroll_inc_int 330; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 331; CHECK-UNROLL-NOT: dls 332; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 333; CHECK-UNROLL-NOT: le lr, [[LOOP]] 334; CHECK-UNROLL: bne [[LOOP]] 335; CHECK-UNROLL: wls lr, lr, [[EXIT:.LBB[0-9_]+]] 336; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 337; CHECK-UNROLL: le lr, [[EPIL]] 338; CHECK-UNROLL-NEXT: [[EXIT]] 339 340define void @unroll_inc_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 341entry: 342 %cmp8 = icmp sgt i32 %N, 0 343 br i1 %cmp8, label %for.body, label %for.cond.cleanup 344 345for.cond.cleanup: 346 ret void 347 348for.body: 349 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 350 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 351 %0 = load i32, i32* %arrayidx, align 4 352 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 353 %1 = load i32, i32* %arrayidx1, align 4 354 %mul = mul nsw i32 %1, %0 355 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 356 store i32 %mul, i32* %arrayidx2, align 4 357 %inc = add nuw nsw i32 %i.09, 1 358 %exitcond = icmp eq i32 %inc, %N 359 br i1 %exitcond, label %for.cond.cleanup, label %for.body 360} 361 362; CHECK-LABEL: unroll_inc_unsigned 363; CHECK: call i1 @llvm.test.set.loop.iterations.i32(i32 %N) 364; CHECK: call i32 @llvm.loop.decrement.reg.i32( 365 366; CHECK-LLC-LABEL: unroll_inc_unsigned: 367; CHECK-LLC: wls lr, r3, [[EXIT:.LBB[0-9_]+]] 368; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]: 369; CHECK-LLC: le lr, [[HEADER]] 370; CHECK-LLC-NEXT: [[EXIT]]: 371 372; TODO: We should be able to support the unrolled loop body. 373; CHECK-UNROLL-LABEL: unroll_inc_unsigned 374; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 375; CHECK-UNROLL-NOT: dls 376; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 377; CHECK-UNROLL-NOT: le lr, [[LOOP]] 378; CHECK-UNROLL: bne [[LOOP]] 379; CHECK-UNROLL: wls lr, lr, [[EPIL_EXIT:.LBB[0-9_]+]] 380; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 381; CHECK-UNROLL: le lr, [[EPIL]] 382; CHECK-UNROLL: [[EPIL_EXIT]]: 383; CHECK-UNROLL: pop 384define void @unroll_inc_unsigned(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 385entry: 386 %cmp8 = icmp eq i32 %N, 0 387 br i1 %cmp8, label %for.cond.cleanup, label %for.body 388 389for.cond.cleanup: 390 ret void 391 392for.body: 393 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 394 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 395 %0 = load i32, i32* %arrayidx, align 4 396 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 397 %1 = load i32, i32* %arrayidx1, align 4 398 %mul = mul nsw i32 %1, %0 399 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 400 store i32 %mul, i32* %arrayidx2, align 4 401 %inc = add nuw i32 %i.09, 1 402 %exitcond = icmp eq i32 %inc, %N 403 br i1 %exitcond, label %for.cond.cleanup, label %for.body 404} 405 406; CHECK-LABEL: unroll_dec_int 407; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 408; CHECK: call i32 @llvm.loop.decrement.reg.i32( 409 410; TODO: An unnecessary register is being held to hold COUNT, lr should just 411; be used instead. 412; CHECK-LLC-LABEL: unroll_dec_int: 413; CHECK-LLC: dls lr, r3 414; CHECK-LLC-NOT: mov lr, r3 415; CHECK-LLC: [[HEADER:.LBB[0-9_]+]]: 416; CHECK-LLC: le lr, [[HEADER]] 417 418; CHECK-UNROLL-LABEL: unroll_dec_int: 419; CHECK-UNROLL: wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]] 420; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: 421; CHECK-UNROLL: le lr, [[PROLOGUE]] 422; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: 423; CHECK-UNROLL: dls lr, lr 424; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: 425; CHECK-UNROLL: le lr, [[BODY]] 426; CHECK-UNROLL-NOT: b 427; CHECK-UNROLL: pop 428define void @unroll_dec_int(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { 429entry: 430 %cmp8 = icmp sgt i32 %N, 0 431 br i1 %cmp8, label %for.body, label %for.cond.cleanup 432 433for.cond.cleanup: 434 ret void 435 436for.body: 437 %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ] 438 %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.09 439 %0 = load i32, i32* %arrayidx, align 4 440 %arrayidx1 = getelementptr inbounds i32, i32* %c, i32 %i.09 441 %1 = load i32, i32* %arrayidx1, align 4 442 %mul = mul nsw i32 %1, %0 443 %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 %i.09 444 store i32 %mul, i32* %arrayidx2, align 4 445 %dec = add nsw i32 %i.09, -1 446 %cmp = icmp sgt i32 %dec, 0 447 br i1 %cmp, label %for.body, label %for.cond.cleanup 448} 449 450declare i32 @llvm.start.loop.iterations.i32(i32) #0 451declare i1 @llvm.test.set.loop.iterations.i32(i32) #0 452declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #0 453 454