1; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s 2 3;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4;; This test checks that x86-cmov-converter optimization transform CMOV 5;; instruction into branches when it is profitable. 6;; There are 5 cases below: 7;; 1. CmovInCriticalPath: 8;; CMOV depends on the condition and it is in the hot path. 9;; Thus, it worths transforming. 10;; 11;; 2. CmovNotInCriticalPath: 12;; Similar test like in (1), just that CMOV is not in the hot path. 13;; Thus, it does not worth transforming. 14;; 15;; 3. MaxIndex: 16;; Maximum calculation algorithm that is looking for the max index, 17;; calculating CMOV value is cheaper than calculating CMOV condition. 18;; Thus, it worths transforming. 19;; 20;; 4. MaxValue: 21;; Maximum calculation algorithm that is looking for the max value, 22;; calculating CMOV value is not cheaper than calculating CMOV condition. 23;; Thus, it does not worth transforming. 24;; 25;; 5. BinarySearch: 26;; Usually, binary search CMOV is not predicted. 27;; Thus, it does not worth transforming. 28;; 29;; Test was created using the following command line: 30;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o - 31;; Where foo.c is: 32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 33;;void CmovInHotPath(int n, int a, int b, int *c, int *d) { 34;; for (int i = 0; i < n; i++) { 35;; int t = c[i] + 1; 36;; if (c[i] * a > b) 37;; t = 10; 38;; c[i] = (c[i] + 1) * t; 39;; } 40;;} 41;; 42;; 43;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) { 44;; for (int i = 0; i < n; i++) { 45;; int t = c[i]; 46;; if (c[i] * a > b) 47;; t = 10; 48;; c[i] = t; 49;; d[i] /= b; 50;; } 51;;} 52;; 53;; 54;;int MaxIndex(int n, int *a) { 55;; int t = 0; 56;; for (int i = 1; i < n; i++) { 57;; if (a[i] > a[t]) 58;; t = i; 59;; } 60;; return a[t]; 61;;} 62;; 63;; 64;;int MaxValue(int n, int *a) { 65;; int t = a[0]; 66;; for (int i = 1; i < n; i++) { 67;; if (a[i] > t) 68;; t = a[i]; 69;; } 70;; return t; 71;;} 72;; 73;;typedef struct Node Node; 74;;struct Node { 75;; unsigned Val; 76;; Node *Right; 77;; Node *Left; 78;;}; 79;; 80;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) { 81;; while (Curr->Val > Next->Val) { 82;; Curr = Next; 83;; if (Mask & (0x1 << Curr->Val)) 84;; Next = Curr->Right; 85;; else 86;; Next = Curr->Left; 87;; } 88;; return Curr->Val; 89;;} 90;; 91;; 92;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) { 93;; for (int i = 0; i < n; i++) { 94;; int t = c[i]; 95;; if (c[i] * a > b) 96;; t = 10; 97;; c[i] = t; 98;; } 99;;} 100;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 101 102%struct.Node = type { i32, %struct.Node*, %struct.Node* } 103 104; CHECK-LABEL: CmovInHotPath 105; CHECK-NOT: cmov 106; CHECK: jg 107 108define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 { 109entry: 110 %cmp14 = icmp sgt i32 %n, 0 111 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 112 113for.body.preheader: ; preds = %entry 114 %wide.trip.count = zext i32 %n to i64 115 br label %for.body 116 117for.cond.cleanup: ; preds = %for.body, %entry 118 ret void 119 120for.body: ; preds = %for.body.preheader, %for.body 121 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 122 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv 123 %0 = load i32, i32* %arrayidx, align 4 124 %add = add nsw i32 %0, 1 125 %mul = mul nsw i32 %0, %a 126 %cmp3 = icmp sgt i32 %mul, %b 127 %. = select i1 %cmp3, i32 10, i32 %add 128 %mul7 = mul nsw i32 %., %add 129 store i32 %mul7, i32* %arrayidx, align 4 130 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 131 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 132 br i1 %exitcond, label %for.cond.cleanup, label %for.body 133} 134 135; CHECK-LABEL: CmovNotInHotPath 136; CHECK: cmovg 137 138define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 { 139entry: 140 %cmp18 = icmp sgt i32 %n, 0 141 br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup 142 143for.body.preheader: ; preds = %entry 144 %wide.trip.count = zext i32 %n to i64 145 br label %for.body 146 147for.cond.cleanup: ; preds = %for.body, %entry 148 ret void 149 150for.body: ; preds = %for.body.preheader, %for.body 151 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 152 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv 153 %0 = load i32, i32* %arrayidx, align 4 154 %mul = mul nsw i32 %0, %a 155 %cmp3 = icmp sgt i32 %mul, %b 156 %. = select i1 %cmp3, i32 10, i32 %0 157 store i32 %., i32* %arrayidx, align 4 158 %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv 159 %1 = load i32, i32* %arrayidx7, align 4 160 %div = sdiv i32 %1, %b 161 store i32 %div, i32* %arrayidx7, align 4 162 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 163 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 164 br i1 %exitcond, label %for.cond.cleanup, label %for.body 165} 166 167; CHECK-LABEL: MaxIndex 168; CHECK-NOT: cmov 169; CHECK: jg 170 171define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 { 172entry: 173 %cmp14 = icmp sgt i32 %n, 1 174 br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup 175 176for.body.preheader: ; preds = %entry 177 %wide.trip.count = zext i32 %n to i64 178 br label %for.body 179 180for.cond.cleanup.loopexit: ; preds = %for.body 181 %phitmp = sext i32 %i.0.t.0 to i64 182 br label %for.cond.cleanup 183 184for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 185 %t.0.lcssa = phi i64 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ] 186 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %t.0.lcssa 187 %0 = load i32, i32* %arrayidx5, align 4 188 ret i32 %0 189 190for.body: ; preds = %for.body.preheader, %for.body 191 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 192 %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ] 193 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 194 %1 = load i32, i32* %arrayidx, align 4 195 %idxprom1 = sext i32 %t.015 to i64 196 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1 197 %2 = load i32, i32* %arrayidx2, align 4 198 %cmp3 = icmp sgt i32 %1, %2 199 %3 = trunc i64 %indvars.iv to i32 200 %i.0.t.0 = select i1 %cmp3, i32 %3, i32 %t.015 201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 202 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 203 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 204} 205 206; CHECK-LABEL: MaxValue 207; CHECK-NOT: jg 208; CHECK: cmovg 209 210define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 { 211entry: 212 %0 = load i32, i32* %a, align 4 213 %cmp13 = icmp sgt i32 %n, 1 214 br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup 215 216for.body.preheader: ; preds = %entry 217 %wide.trip.count = zext i32 %n to i64 218 br label %for.body 219 220for.cond.cleanup: ; preds = %for.body, %entry 221 %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ] 222 ret i32 %t.0.lcssa 223 224for.body: ; preds = %for.body.preheader, %for.body 225 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ] 226 %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ] 227 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 228 %1 = load i32, i32* %arrayidx1, align 4 229 %cmp2 = icmp sgt i32 %1, %t.014 230 %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014 231 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 232 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 233 br i1 %exitcond, label %for.cond.cleanup, label %for.body 234} 235 236; CHECK-LABEL: BinarySearch 237; CHECK: set 238 239define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 { 240entry: 241 %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0 242 %0 = load i32, i32* %Val8, align 8 243 %Val19 = getelementptr inbounds %struct.Node, %struct.Node* %Next, i64 0, i32 0 244 %1 = load i32, i32* %Val19, align 8 245 %cmp10 = icmp ugt i32 %0, %1 246 br i1 %cmp10, label %while.body, label %while.end 247 248while.body: ; preds = %entry, %while.body 249 %2 = phi i32 [ %4, %while.body ], [ %1, %entry ] 250 %Next.addr.011 = phi %struct.Node* [ %3, %while.body ], [ %Next, %entry ] 251 %shl = shl i32 1, %2 252 %and = and i32 %shl, %Mask 253 %tobool = icmp eq i32 %and, 0 254 %Left = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 2 255 %Right = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 1 256 %Left.sink = select i1 %tobool, %struct.Node** %Left, %struct.Node** %Right 257 %3 = load %struct.Node*, %struct.Node** %Left.sink, align 8 258 %Val1 = getelementptr inbounds %struct.Node, %struct.Node* %3, i64 0, i32 0 259 %4 = load i32, i32* %Val1, align 8 260 %cmp = icmp ugt i32 %2, %4 261 br i1 %cmp, label %while.body, label %while.end 262 263while.end: ; preds = %while.body, %entry 264 %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ] 265 ret i32 %.lcssa 266} 267 268;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 269;; The following test checks that x86-cmov-converter optimization transforms 270;; CMOV instructions into branch correctly. 271;; 272;; MBB: 273;; cond = cmp ... 274;; v1 = CMOVgt t1, f1, cond 275;; v2 = CMOVle s1, f2, cond 276;; 277;; Where: t1 = 11, f1 = 22, f2 = a 278;; 279;; After CMOV transformation 280;; ------------------------- 281;; MBB: 282;; cond = cmp ... 283;; ja %SinkMBB 284;; 285;; FalseMBB: 286;; jmp %SinkMBB 287;; 288;; SinkMBB: 289;; %v1 = phi[%f1, %FalseMBB], [%t1, %MBB] 290;; %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch 291;; ; true-value with false-value 292;; ; Phi instruction cannot use 293;; ; previous Phi instruction result 294;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 295 296; CHECK-LABEL: Transform 297; CHECK-NOT: cmov 298; CHECK: divl [[a:%[0-9a-z]*]] 299; CHECK: movl $11, [[s1:%[0-9a-z]*]] 300; CHECK: movl [[a]], [[s2:%[0-9a-z]*]] 301; CHECK: cmpl [[a]], %edx 302; CHECK: ja [[SinkBB:.*]] 303; CHECK: [[FalseBB:.*]]: 304; CHECK: movl $22, [[s1]] 305; CHECK: movl $22, [[s2]] 306; CHECK: [[SinkBB]]: 307; CHECK: ja 308 309define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 { 310entry: 311 %cmp10 = icmp ugt i32 0, %n 312 br i1 %cmp10, label %while.body, label %while.end 313 314while.body: ; preds = %entry, %while.body 315 %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ] 316 %arr_i = getelementptr inbounds i32, i32* %arr, i32 %i 317 %x = load i32, i32* %arr_i, align 4 318 %div = udiv i32 %x, %a 319 %cond = icmp ugt i32 %div, %a 320 %condOpp = icmp ule i32 %div, %a 321 %s1 = select i1 %cond, i32 11, i32 22 322 %s2 = select i1 %condOpp, i32 %s1, i32 %a 323 %sum = urem i32 %s1, %s2 324 store i32 %sum, i32* %arr_i, align 4 325 %i_inc = add i32 %i, 1 326 %cmp = icmp ugt i32 %i_inc, %n 327 br i1 %cmp, label %while.body, label %while.end 328 329while.end: ; preds = %while.body, %entry 330 ret void 331} 332 333; Test that we always will convert a cmov with a memory operand into a branch, 334; even outside of a loop. 335define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 { 336; CHECK-LABEL: test_cmov_memoperand: 337entry: 338 %cond = icmp ugt i32 %a, %b 339; CHECK: cmpl 340 %load = load i32, i32* %y 341 %z = select i1 %cond, i32 %x, i32 %load 342; CHECK-NOT: cmov 343; CHECK: ja [[FALSE_BB:.*]] 344; CHECK: movl (%r{{..}}), %[[R:.*]] 345; CHECK: [[FALSE_BB]]: 346; CHECK: movl %[[R]], % 347 ret i32 %z 348} 349 350; Test that we can convert a group of cmovs where only one has a memory 351; operand. 352define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 { 353; CHECK-LABEL: test_cmov_memoperand_in_group: 354entry: 355 %cond = icmp ugt i32 %a, %b 356; CHECK: cmpl 357 %y = load i32, i32* %y.ptr 358 %z1 = select i1 %cond, i32 %x, i32 %a 359 %z2 = select i1 %cond, i32 %x, i32 %y 360 %z3 = select i1 %cond, i32 %x, i32 %b 361; CHECK-NOT: cmov 362; CHECK: ja [[FALSE_BB:.*]] 363; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] 364; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] 365; CHECK-DAG: movl %{{.*}} %[[R3:.*]] 366; CHECK: [[FALSE_BB]]: 367; CHECK: addl 368; CHECK-DAG: %[[R1]] 369; CHECK-DAG: , 370; CHECK-DAG: %[[R3]] 371; CHECK-DAG: addl 372; CHECK-DAG: %[[R2]] 373; CHECK-DAG: , 374; CHECK-DAG: %[[R3]] 375; CHECK: movl %[[R3]], %eax 376; CHECK: retq 377 %s1 = add i32 %z1, %z2 378 %s2 = add i32 %s1, %z3 379 ret i32 %s2 380} 381 382; Same as before but with operands reversed in the select with a load. 383define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 { 384; CHECK-LABEL: test_cmov_memoperand_in_group2: 385entry: 386 %cond = icmp ugt i32 %a, %b 387; CHECK: cmpl 388 %y = load i32, i32* %y.ptr 389 %z2 = select i1 %cond, i32 %a, i32 %x 390 %z1 = select i1 %cond, i32 %y, i32 %x 391 %z3 = select i1 %cond, i32 %b, i32 %x 392; CHECK-NOT: cmov 393; CHECK: jbe [[FALSE_BB:.*]] 394; CHECK-DAG: movl %{{.*}}, %[[R1:.*]] 395; CHECK-DAG: movl (%r{{..}}), %[[R2:.*]] 396; CHECK-DAG: movl %{{.*}} %[[R3:.*]] 397; CHECK: [[FALSE_BB]]: 398; CHECK: addl 399; CHECK-DAG: %[[R1]] 400; CHECK-DAG: , 401; CHECK-DAG: %[[R3]] 402; CHECK-DAG: addl 403; CHECK-DAG: %[[R2]] 404; CHECK-DAG: , 405; CHECK-DAG: %[[R3]] 406; CHECK: movl %[[R3]], %eax 407; CHECK: retq 408 %s1 = add i32 %z1, %z2 409 %s2 = add i32 %s1, %z3 410 ret i32 %s2 411} 412 413; Test that we don't convert a group of cmovs with conflicting directions of 414; loads. 415define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 { 416; CHECK-LABEL: test_cmov_memoperand_conflicting_dir: 417entry: 418 %cond = icmp ugt i32 %a, %b 419; CHECK: cmpl 420 %y1 = load i32, i32* %y1.ptr 421 %y2 = load i32, i32* %y2.ptr 422 %z1 = select i1 %cond, i32 %x, i32 %y1 423 %z2 = select i1 %cond, i32 %y2, i32 %x 424; CHECK: cmoval 425; CHECK: cmoval 426 %s1 = add i32 %z1, %z2 427 ret i32 %s1 428} 429 430; Test that we can convert a group of cmovs where only one has a memory 431; operand and where that memory operand's registers come from a prior cmov in 432; the group. 433define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 { 434; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr: 435entry: 436 %cond = icmp ugt i32 %a, %b 437; CHECK: cmpl 438 %p = select i1 %cond, i32* %x, i32* %y 439 %load = load i32, i32* %p 440 %z = select i1 %cond, i32 %a, i32 %load 441; CHECK-NOT: cmov 442; CHECK: ja [[FALSE_BB:.*]] 443; CHECK: movl (%r{{..}}), %[[R:.*]] 444; CHECK: [[FALSE_BB]]: 445; CHECK: movl %[[R]], %eax 446; CHECK: retq 447 ret i32 %z 448} 449 450; Test that we can convert a group of two cmovs with memory operands where one 451; uses the result of the other as part of the address. 452define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 { 453; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2: 454entry: 455 %cond = icmp ugt i32 %a, %b 456; CHECK: cmpl 457 %load1 = load i32*, i32** %y 458 %p = select i1 %cond, i32* %x, i32* %load1 459 %load2 = load i32, i32* %p 460 %z = select i1 %cond, i32 %a, i32 %load2 461; CHECK-NOT: cmov 462; CHECK: ja [[FALSE_BB:.*]] 463; CHECK: movq (%r{{..}}), %[[R1:.*]] 464; CHECK: movl (%[[R1]]), %[[R2:.*]] 465; CHECK: [[FALSE_BB]]: 466; CHECK: movl %[[R2]], %eax 467; CHECK: retq 468 ret i32 %z 469} 470 471; Test that we can convert a group of cmovs where only one has a memory 472; operand and where that memory operand's registers come from a prior cmov and 473; where that cmov gets *its* input from a prior cmov in the group. 474define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 { 475; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3: 476entry: 477 %cond = icmp ugt i32 %a, %b 478; CHECK: cmpl 479 %p = select i1 %cond, i32* %x, i32* %y 480 %p2 = select i1 %cond, i32* %z, i32* %p 481 %load = load i32, i32* %p2 482 %r = select i1 %cond, i32 %a, i32 %load 483; CHECK-NOT: cmov 484; CHECK: ja [[FALSE_BB:.*]] 485; CHECK: movl (%r{{..}}), %[[R:.*]] 486; CHECK: [[FALSE_BB]]: 487; CHECK: movl %[[R]], %eax 488; CHECK: retq 489 ret i32 %r 490} 491 492attributes #0 = {"target-cpu"="x86-64"} 493