• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap %s
2
3;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4;; This test checks that x86-cmov-converter optimization transform CMOV
5;; instruction into branches when it is profitable.
6;; There are 5 cases below:
7;;   1. CmovInCriticalPath:
8;;        CMOV depends on the condition and it is in the hot path.
9;;        Thus, it worths transforming.
10;;
11;;   2. CmovNotInCriticalPath:
12;;        Similar test like in (1), just that CMOV is not in the hot path.
13;;        Thus, it does not worth transforming.
14;;
15;;   3. MaxIndex:
16;;        Maximum calculation algorithm that is looking for the max index,
17;;        calculating CMOV value is cheaper than calculating CMOV condition.
18;;        Thus, it worths transforming.
19;;
20;;   4. MaxValue:
21;;        Maximum calculation algorithm that is looking for the max value,
22;;        calculating CMOV value is not cheaper than calculating CMOV condition.
23;;        Thus, it does not worth transforming.
24;;
25;;   5. BinarySearch:
26;;        Usually, binary search CMOV is not predicted.
27;;        Thus, it does not worth transforming.
28;;
29;; Test was created using the following command line:
30;; > clang -S -O2 -m64 -fno-vectorize -fno-unroll-loops -emit-llvm foo.c -o -
31;; Where foo.c is:
32;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
33;;void CmovInHotPath(int n, int a, int b, int *c, int *d) {
34;;  for (int i = 0; i < n; i++) {
35;;    int t = c[i] + 1;
36;;    if (c[i] * a > b)
37;;      t = 10;
38;;    c[i] = (c[i] + 1) * t;
39;;  }
40;;}
41;;
42;;
43;;void CmovNotInHotPath(int n, int a, int b, int *c, int *d) {
44;;  for (int i = 0; i < n; i++) {
45;;    int t = c[i];
46;;    if (c[i] * a > b)
47;;      t = 10;
48;;    c[i] = t;
49;;    d[i] /= b;
50;;  }
51;;}
52;;
53;;
54;;int MaxIndex(int n, int *a) {
55;;  int t = 0;
56;;  for (int i = 1; i < n; i++) {
57;;    if (a[i] > a[t])
58;;      t = i;
59;;  }
60;;  return a[t];
61;;}
62;;
63;;
64;;int MaxValue(int n, int *a) {
65;;  int t = a[0];
66;;  for (int i = 1; i < n; i++) {
67;;    if (a[i] > t)
68;;      t = a[i];
69;;  }
70;;  return t;
71;;}
72;;
73;;typedef struct Node Node;
74;;struct Node {
75;;  unsigned Val;
76;;  Node *Right;
77;;  Node *Left;
78;;};
79;;
80;;unsigned BinarySearch(unsigned Mask, Node *Curr, Node *Next) {
81;;  while (Curr->Val > Next->Val) {
82;;    Curr = Next;
83;;    if (Mask & (0x1 << Curr->Val))
84;;      Next = Curr->Right;
85;;    else
86;;      Next = Curr->Left;
87;;  }
88;;  return Curr->Val;
89;;}
90;;
91;;
92;;void SmallGainPerLoop(int n, int a, int b, int *c, int *d) {
93;;  for (int i = 0; i < n; i++) {
94;;    int t = c[i];
95;;    if (c[i] * a > b)
96;;      t = 10;
97;;    c[i] = t;
98;;  }
99;;}
100;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
101
102%struct.Node = type { i32, %struct.Node*, %struct.Node* }
103
104; CHECK-LABEL: CmovInHotPath
105; CHECK-NOT: cmov
106; CHECK: jg
107
108define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
109entry:
110  %cmp14 = icmp sgt i32 %n, 0
111  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
112
113for.body.preheader:                               ; preds = %entry
114  %wide.trip.count = zext i32 %n to i64
115  br label %for.body
116
117for.cond.cleanup:                                 ; preds = %for.body, %entry
118  ret void
119
120for.body:                                         ; preds = %for.body.preheader, %for.body
121  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
122  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
123  %0 = load i32, i32* %arrayidx, align 4
124  %add = add nsw i32 %0, 1
125  %mul = mul nsw i32 %0, %a
126  %cmp3 = icmp sgt i32 %mul, %b
127  %. = select i1 %cmp3, i32 10, i32 %add
128  %mul7 = mul nsw i32 %., %add
129  store i32 %mul7, i32* %arrayidx, align 4
130  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
131  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
132  br i1 %exitcond, label %for.cond.cleanup, label %for.body
133}
134
135; CHECK-LABEL: CmovNotInHotPath
136; CHECK: cmovg
137
138define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
139entry:
140  %cmp18 = icmp sgt i32 %n, 0
141  br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
142
143for.body.preheader:                               ; preds = %entry
144  %wide.trip.count = zext i32 %n to i64
145  br label %for.body
146
147for.cond.cleanup:                                 ; preds = %for.body, %entry
148  ret void
149
150for.body:                                         ; preds = %for.body.preheader, %for.body
151  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
152  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
153  %0 = load i32, i32* %arrayidx, align 4
154  %mul = mul nsw i32 %0, %a
155  %cmp3 = icmp sgt i32 %mul, %b
156  %. = select i1 %cmp3, i32 10, i32 %0
157  store i32 %., i32* %arrayidx, align 4
158  %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
159  %1 = load i32, i32* %arrayidx7, align 4
160  %div = sdiv i32 %1, %b
161  store i32 %div, i32* %arrayidx7, align 4
162  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
163  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
164  br i1 %exitcond, label %for.cond.cleanup, label %for.body
165}
166
167; CHECK-LABEL: MaxIndex
168; CHECK-NOT: cmov
169; CHECK: jg
170
171define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
172entry:
173  %cmp14 = icmp sgt i32 %n, 1
174  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
175
176for.body.preheader:                               ; preds = %entry
177  %wide.trip.count = zext i32 %n to i64
178  br label %for.body
179
180for.cond.cleanup.loopexit:                        ; preds = %for.body
181  %phitmp = sext i32 %i.0.t.0 to i64
182  br label %for.cond.cleanup
183
184for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
185  %t.0.lcssa = phi i64 [ 0, %entry ], [ %phitmp, %for.cond.cleanup.loopexit ]
186  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %t.0.lcssa
187  %0 = load i32, i32* %arrayidx5, align 4
188  ret i32 %0
189
190for.body:                                         ; preds = %for.body.preheader, %for.body
191  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
192  %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
193  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
194  %1 = load i32, i32* %arrayidx, align 4
195  %idxprom1 = sext i32 %t.015 to i64
196  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
197  %2 = load i32, i32* %arrayidx2, align 4
198  %cmp3 = icmp sgt i32 %1, %2
199  %3 = trunc i64 %indvars.iv to i32
200  %i.0.t.0 = select i1 %cmp3, i32 %3, i32 %t.015
201  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
202  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
203  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
204}
205
206; CHECK-LABEL: MaxValue
207; CHECK-NOT: jg
208; CHECK: cmovg
209
210define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
211entry:
212  %0 = load i32, i32* %a, align 4
213  %cmp13 = icmp sgt i32 %n, 1
214  br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup
215
216for.body.preheader:                               ; preds = %entry
217  %wide.trip.count = zext i32 %n to i64
218  br label %for.body
219
220for.cond.cleanup:                                 ; preds = %for.body, %entry
221  %t.0.lcssa = phi i32 [ %0, %entry ], [ %.t.0, %for.body ]
222  ret i32 %t.0.lcssa
223
224for.body:                                         ; preds = %for.body.preheader, %for.body
225  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
226  %t.014 = phi i32 [ %.t.0, %for.body ], [ %0, %for.body.preheader ]
227  %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
228  %1 = load i32, i32* %arrayidx1, align 4
229  %cmp2 = icmp sgt i32 %1, %t.014
230  %.t.0 = select i1 %cmp2, i32 %1, i32 %t.014
231  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
232  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
233  br i1 %exitcond, label %for.cond.cleanup, label %for.body
234}
235
236; CHECK-LABEL: BinarySearch
237; CHECK: set
238
239define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
240entry:
241  %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
242  %0 = load i32, i32* %Val8, align 8
243  %Val19 = getelementptr inbounds %struct.Node, %struct.Node* %Next, i64 0, i32 0
244  %1 = load i32, i32* %Val19, align 8
245  %cmp10 = icmp ugt i32 %0, %1
246  br i1 %cmp10, label %while.body, label %while.end
247
248while.body:                                       ; preds = %entry, %while.body
249  %2 = phi i32 [ %4, %while.body ], [ %1, %entry ]
250  %Next.addr.011 = phi %struct.Node* [ %3, %while.body ], [ %Next, %entry ]
251  %shl = shl i32 1, %2
252  %and = and i32 %shl, %Mask
253  %tobool = icmp eq i32 %and, 0
254  %Left = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 2
255  %Right = getelementptr inbounds %struct.Node, %struct.Node* %Next.addr.011, i64 0, i32 1
256  %Left.sink = select i1 %tobool, %struct.Node** %Left, %struct.Node** %Right
257  %3 = load %struct.Node*, %struct.Node** %Left.sink, align 8
258  %Val1 = getelementptr inbounds %struct.Node, %struct.Node* %3, i64 0, i32 0
259  %4 = load i32, i32* %Val1, align 8
260  %cmp = icmp ugt i32 %2, %4
261  br i1 %cmp, label %while.body, label %while.end
262
263while.end:                                        ; preds = %while.body, %entry
264  %.lcssa = phi i32 [ %0, %entry ], [ %2, %while.body ]
265  ret i32 %.lcssa
266}
267
268;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
269;; The following test checks that x86-cmov-converter optimization transforms
270;; CMOV instructions into branch correctly.
271;;
272;; MBB:
273;;   cond = cmp ...
274;;   v1 = CMOVgt t1, f1, cond
275;;   v2 = CMOVle s1, f2, cond
276;;
277;; Where: t1 = 11, f1 = 22, f2 = a
278;;
279;; After CMOV transformation
280;; -------------------------
281;; MBB:
282;;   cond = cmp ...
283;;   ja %SinkMBB
284;;
285;; FalseMBB:
286;;   jmp %SinkMBB
287;;
288;; SinkMBB:
289;;   %v1 = phi[%f1, %FalseMBB], [%t1, %MBB]
290;;   %v2 = phi[%f1, %FalseMBB], [%f2, %MBB] ; For CMOV with OppCC switch
291;;                                          ; true-value with false-value
292;;                                          ; Phi instruction cannot use
293;;                                          ; previous Phi instruction result
294;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
295
296; CHECK-LABEL: Transform
297; CHECK-NOT: cmov
298; CHECK:         divl    [[a:%[0-9a-z]*]]
299; CHECK:         movl    $11, [[s1:%[0-9a-z]*]]
300; CHECK:         movl    [[a]], [[s2:%[0-9a-z]*]]
301; CHECK:         cmpl    [[a]], %edx
302; CHECK:         ja      [[SinkBB:.*]]
303; CHECK: [[FalseBB:.*]]:
304; CHECK:         movl    $22, [[s1]]
305; CHECK:         movl    $22, [[s2]]
306; CHECK: [[SinkBB]]:
307; CHECK:         ja
308
309define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
310entry:
311  %cmp10 = icmp ugt i32 0, %n
312  br i1 %cmp10, label %while.body, label %while.end
313
314while.body:                                       ; preds = %entry, %while.body
315  %i = phi i32 [ %i_inc, %while.body ], [ 0, %entry ]
316  %arr_i = getelementptr inbounds i32, i32* %arr, i32 %i
317  %x = load i32, i32* %arr_i, align 4
318  %div = udiv i32 %x, %a
319  %cond = icmp ugt i32 %div, %a
320  %condOpp = icmp ule i32 %div, %a
321  %s1 = select i1 %cond, i32 11, i32 22
322  %s2 = select i1 %condOpp, i32 %s1, i32 %a
323  %sum = urem i32 %s1, %s2
324  store i32 %sum, i32* %arr_i, align 4
325  %i_inc = add i32 %i, 1
326  %cmp = icmp ugt i32 %i_inc, %n
327  br i1 %cmp, label %while.body, label %while.end
328
329while.end:                                        ; preds = %while.body, %entry
330  ret void
331}
332
333; Test that we always will convert a cmov with a memory operand into a branch,
334; even outside of a loop.
335define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
336; CHECK-LABEL: test_cmov_memoperand:
337entry:
338  %cond = icmp ugt i32 %a, %b
339; CHECK:         cmpl
340  %load = load i32, i32* %y
341  %z = select i1 %cond, i32 %x, i32 %load
342; CHECK-NOT:     cmov
343; CHECK:         ja [[FALSE_BB:.*]]
344; CHECK:         movl (%r{{..}}), %[[R:.*]]
345; CHECK:       [[FALSE_BB]]:
346; CHECK:         movl %[[R]], %
347  ret i32 %z
348}
349
350; Test that we can convert a group of cmovs where only one has a memory
351; operand.
352define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
353; CHECK-LABEL: test_cmov_memoperand_in_group:
354entry:
355  %cond = icmp ugt i32 %a, %b
356; CHECK:         cmpl
357  %y = load i32, i32* %y.ptr
358  %z1 = select i1 %cond, i32 %x, i32 %a
359  %z2 = select i1 %cond, i32 %x, i32 %y
360  %z3 = select i1 %cond, i32 %x, i32 %b
361; CHECK-NOT:     cmov
362; CHECK:         ja [[FALSE_BB:.*]]
363; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
364; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
365; CHECK-DAG:     movl %{{.*}} %[[R3:.*]]
366; CHECK:       [[FALSE_BB]]:
367; CHECK:         addl
368; CHECK-DAG:       %[[R1]]
369; CHECK-DAG:       ,
370; CHECK-DAG:       %[[R3]]
371; CHECK-DAG:     addl
372; CHECK-DAG:       %[[R2]]
373; CHECK-DAG:       ,
374; CHECK-DAG:       %[[R3]]
375; CHECK:         movl %[[R3]], %eax
376; CHECK:         retq
377  %s1 = add i32 %z1, %z2
378  %s2 = add i32 %s1, %z3
379  ret i32 %s2
380}
381
382; Same as before but with operands reversed in the select with a load.
383define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
384; CHECK-LABEL: test_cmov_memoperand_in_group2:
385entry:
386  %cond = icmp ugt i32 %a, %b
387; CHECK:         cmpl
388  %y = load i32, i32* %y.ptr
389  %z2 = select i1 %cond, i32 %a, i32 %x
390  %z1 = select i1 %cond, i32 %y, i32 %x
391  %z3 = select i1 %cond, i32 %b, i32 %x
392; CHECK-NOT:     cmov
393; CHECK:         jbe [[FALSE_BB:.*]]
394; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
395; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
396; CHECK-DAG:     movl %{{.*}} %[[R3:.*]]
397; CHECK:       [[FALSE_BB]]:
398; CHECK:         addl
399; CHECK-DAG:       %[[R1]]
400; CHECK-DAG:       ,
401; CHECK-DAG:       %[[R3]]
402; CHECK-DAG:     addl
403; CHECK-DAG:       %[[R2]]
404; CHECK-DAG:       ,
405; CHECK-DAG:       %[[R3]]
406; CHECK:         movl %[[R3]], %eax
407; CHECK:         retq
408  %s1 = add i32 %z1, %z2
409  %s2 = add i32 %s1, %z3
410  ret i32 %s2
411}
412
413; Test that we don't convert a group of cmovs with conflicting directions of
414; loads.
415define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
416; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
417entry:
418  %cond = icmp ugt i32 %a, %b
419; CHECK:         cmpl
420  %y1 = load i32, i32* %y1.ptr
421  %y2 = load i32, i32* %y2.ptr
422  %z1 = select i1 %cond, i32 %x, i32 %y1
423  %z2 = select i1 %cond, i32 %y2, i32 %x
424; CHECK:         cmoval
425; CHECK:         cmoval
426  %s1 = add i32 %z1, %z2
427  ret i32 %s1
428}
429
430; Test that we can convert a group of cmovs where only one has a memory
431; operand and where that memory operand's registers come from a prior cmov in
432; the group.
433define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
434; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
435entry:
436  %cond = icmp ugt i32 %a, %b
437; CHECK:         cmpl
438  %p = select i1 %cond, i32* %x, i32* %y
439  %load = load i32, i32* %p
440  %z = select i1 %cond, i32 %a, i32 %load
441; CHECK-NOT:     cmov
442; CHECK:         ja [[FALSE_BB:.*]]
443; CHECK:         movl (%r{{..}}), %[[R:.*]]
444; CHECK:       [[FALSE_BB]]:
445; CHECK:         movl %[[R]], %eax
446; CHECK:         retq
447  ret i32 %z
448}
449
450; Test that we can convert a group of two cmovs with memory operands where one
451; uses the result of the other as part of the address.
452define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
453; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
454entry:
455  %cond = icmp ugt i32 %a, %b
456; CHECK:         cmpl
457  %load1 = load i32*, i32** %y
458  %p = select i1 %cond, i32* %x, i32* %load1
459  %load2 = load i32, i32* %p
460  %z = select i1 %cond, i32 %a, i32 %load2
461; CHECK-NOT:     cmov
462; CHECK:         ja [[FALSE_BB:.*]]
463; CHECK:         movq (%r{{..}}), %[[R1:.*]]
464; CHECK:         movl (%[[R1]]), %[[R2:.*]]
465; CHECK:       [[FALSE_BB]]:
466; CHECK:         movl %[[R2]], %eax
467; CHECK:         retq
468  ret i32 %z
469}
470
471; Test that we can convert a group of cmovs where only one has a memory
472; operand and where that memory operand's registers come from a prior cmov and
473; where that cmov gets *its* input from a prior cmov in the group.
474define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
475; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
476entry:
477  %cond = icmp ugt i32 %a, %b
478; CHECK:         cmpl
479  %p = select i1 %cond, i32* %x, i32* %y
480  %p2 = select i1 %cond, i32* %z, i32* %p
481  %load = load i32, i32* %p2
482  %r = select i1 %cond, i32 %a, i32 %load
483; CHECK-NOT:     cmov
484; CHECK:         ja [[FALSE_BB:.*]]
485; CHECK:         movl (%r{{..}}), %[[R:.*]]
486; CHECK:       [[FALSE_BB]]:
487; CHECK:         movl %[[R]], %eax
488; CHECK:         retq
489  ret i32 %r
490}
491
492attributes #0 = {"target-cpu"="x86-64"}
493