1; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s 2; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL 3 4; Not implemented as a mir test so that changes the generic HardwareLoop can 5; also be tested. These functions have been taken from 6; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation 7; of a few test.set intrinsics, but only one (ne_trip_count) gets generated 8; here. Simplifications result in icmps changing and maybe also the CFG. So, 9; TODO: Teach the HardwareLoops some better pattern recognition. 10 11; CHECK-GLOBAL-NOT: DoLoopStart 12; CHECK-GLOBAL-NOT: WhileLoopStart 13; CHECK-GLOBAL-NOT: LoopEnd 14 15; CHECK: ne_and_guard 16; CHECK: body: 17; CHECK: bb.0.entry: 18; CHECK: t2CMPri renamable $lr, 0 19; CHECK: tBcc %bb.4 20; CHECK: bb.2.while.body.preheader: 21; CHECK: $lr = t2DLS killed renamable $lr 22; CHECK: bb.3.while.body: 23; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 24define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { 25entry: 26 %brmerge.demorgan = and i1 %t1, %t2 27 %cmp6 = icmp ne i32 %N, 0 28 %or.cond = and i1 %brmerge.demorgan, %cmp6 29 br i1 %or.cond, label %while.body, label %if.end 30 31while.body: ; preds = %while.body, %entry 32 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ] 33 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ] 34 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ] 35 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 36 %tmp = load i32, i32* %b.addr.07, align 4 37 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 38 store i32 %tmp, i32* %a.addr.08, align 4 39 %inc = add nuw i32 %i.09, 1 40 %exitcond = icmp eq i32 %inc, %N 41 br i1 %exitcond, label %if.end, label %while.body 42 43if.end: ; preds = %while.body, %entry 44 ret void 45} 46 47; TODO: This could generate WLS 48; CHECK: ne_preheader 49; CHECK: body: 50; CHECK: bb.0.entry: 51; CHECK: t2CMPri renamable $lr, 0 52; CHECK: tBcc %bb.4 53; CHECK: bb.2.while.body.preheader: 54; CHECK: $lr = t2DLS killed renamable $lr 55; CHECK: bb.3.while.body: 56; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 57define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { 58entry: 59 %brmerge.demorgan = and i1 %t1, %t2 60 br i1 %brmerge.demorgan, label %while.preheader, label %if.end 61 62while.preheader: ; preds = %entry 63 %cmp = icmp ne i32 %N, 0 64 br i1 %cmp, label %while.body, label %if.end 65 66while.body: ; preds = %while.body, %while.preheader 67 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] 68 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] 69 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] 70 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 71 %tmp = load i32, i32* %b.addr.07, align 4 72 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 73 store i32 %tmp, i32* %a.addr.08, align 4 74 %inc = add nuw i32 %i.09, 1 75 %exitcond = icmp eq i32 %inc, %N 76 br i1 %exitcond, label %if.end, label %while.body 77 78if.end: ; preds = %while.body, %while.preheader, %entry 79 ret void 80} 81 82; TODO: This could generate WLS 83; CHECK: eq_preheader 84; CHECK: body: 85; CHECK: bb.0.entry: 86; CHECK: t2CMPri renamable $lr, 0 87; CHECK: tBcc %bb.4 88; CHECK: bb.2.while.body.preheader: 89; CHECK: $lr = t2DLS killed renamable $lr 90; CHECK: bb.3.while.body: 91; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 92define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { 93entry: 94 %brmerge.demorgan = and i1 %t1, %t2 95 br i1 %brmerge.demorgan, label %while.preheader, label %if.end 96 97while.preheader: ; preds = %entry 98 %cmp = icmp eq i32 %N, 0 99 br i1 %cmp, label %if.end, label %while.body 100 101while.body: ; preds = %while.body, %while.preheader 102 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] 103 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] 104 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] 105 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 106 %tmp = load i32, i32* %b.addr.07, align 4 107 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 108 store i32 %tmp, i32* %a.addr.08, align 4 109 %inc = add nuw i32 %i.09, 1 110 %exitcond = icmp eq i32 %inc, %N 111 br i1 %exitcond, label %if.end, label %while.body 112 113if.end: ; preds = %while.body, %while.preheader, %entry 114 ret void 115} 116 117; TODO: This could generate WLS 118; CHECK: ne_prepreheader 119; CHECK: body: 120; CHECK: bb.0.entry: 121; CHECK: t2CMPri renamable $lr, 0 122; CHECK: tBcc %bb.4 123; CHECK: bb.2.while.body.preheader: 124; CHECK: $lr = t2DLS killed renamable $lr 125; CHECK: bb.3.while.body: 126; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3 127define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { 128entry: 129 %cmp = icmp ne i32 %N, 0 130 br i1 %cmp, label %while.preheader, label %if.end 131 132while.preheader: ; preds = %entry 133 %brmerge.demorgan = and i1 %t1, %t2 134 br i1 %brmerge.demorgan, label %while.body, label %if.end 135 136while.body: ; preds = %while.body, %while.preheader 137 %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] 138 %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] 139 %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] 140 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 141 %tmp = load i32, i32* %b.addr.07, align 4 142 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 143 store i32 %tmp, i32* %a.addr.08, align 4 144 %inc = add nuw i32 %i.09, 1 145 %exitcond = icmp eq i32 %inc, %N 146 br i1 %exitcond, label %if.end, label %while.body 147 148if.end: ; preds = %while.body, %while.preheader, %entry 149 ret void 150} 151 152; CHECK: be_ne 153; CHECK: body: 154; CHECK: bb.0.entry: 155; CHECK: $lr = t2DLS killed renamable $r12 156; CHECK: bb.2.do.body: 157; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 158define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { 159entry: 160 %cmp = icmp ne i32 %N, 0 161 %sub = sub i32 %N, 1 162 %be = select i1 %cmp, i32 0, i32 %sub 163 %cmp.1 = icmp ne i32 %be, 0 164 br i1 %cmp.1, label %do.body, label %if.end 165 166do.body: ; preds = %do.body, %entry 167 %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ] 168 %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ] 169 %i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ] 170 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1 171 %tmp = load i32, i32* %b.addr.0, align 4 172 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1 173 store i32 %tmp, i32* %a.addr.0, align 4 174 %inc = add nuw i32 %i.0, 1 175 %cmp.2 = icmp ult i32 %inc, %N 176 br i1 %cmp.2, label %do.body, label %if.end 177 178if.end: ; preds = %do.body, %entry 179 ret void 180} 181 182; TODO: Remove the tMOVr in the preheader! 183; CHECK: ne_trip_count 184; CHECK: body: 185; CHECK: bb.0.entry: 186; CHECK: $lr = t2WLS $r3, %bb.3 187; CHECK: bb.1.do.body.preheader: 188; CHECK: $lr = tMOVr 189; CHECK: bb.2.do.body: 190; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2 191define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { 192entry: 193 br label %do.body.preheader 194 195do.body.preheader: 196 %cmp = icmp ne i32 %N, 0 197 br i1 %cmp, label %do.body, label %if.end 198 199do.body: 200 %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ] 201 %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ] 202 %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ] 203 %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1 204 %tmp = load i32, i32* %b.addr.0, align 4 205 %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1 206 store i32 %tmp, i32* %a.addr.0, align 4 207 %inc = add nuw i32 %i.0, 1 208 %cmp.1 = icmp ult i32 %inc, %N 209 br i1 %cmp.1, label %do.body, label %if.end 210 211if.end: ; preds = %do.body, %entry 212 ret void 213} 214