• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; REQUIRES: asserts
2; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
3; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
4
5; @sharedidx is an unrolled variant of this loop:
6;  for (unsigned long i = 0; i < len; i += s) {
7;    c[i] = a[i] + b[i];
8;  }
9; where 's' cannot be folded into the addressing mode.
10;
11; This is not quite profitable to chain. But with -stress-ivchain, we
12; can form three address chains in place of the shared induction
13; variable.
14
15; X64: sharedidx:
16; X64: %for.body.preheader
17; X64-NOT: leal ({{.*}},4)
18; X64: %for.body.1
19
20; X32: sharedidx:
21; X32: %for.body.2
22; X32: add
23; X32: add
24; X32: add
25; X32: add
26; X32: add
27; X32: %for.body.3
28define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
29entry:
30  %cmp8 = icmp eq i32 %len, 0
31  br i1 %cmp8, label %for.end, label %for.body
32
33for.body:                                         ; preds = %entry, %for.body.3
34  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
35  %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
36  %0 = load i8, i8* %arrayidx, align 1
37  %conv6 = zext i8 %0 to i32
38  %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
39  %1 = load i8, i8* %arrayidx1, align 1
40  %conv27 = zext i8 %1 to i32
41  %add = add nsw i32 %conv27, %conv6
42  %conv3 = trunc i32 %add to i8
43  %arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.09
44  store i8 %conv3, i8* %arrayidx4, align 1
45  %add5 = add i32 %i.09, %s
46  %cmp = icmp ult i32 %add5, %len
47  br i1 %cmp, label %for.body.1, label %for.end
48
49for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
50  ret void
51
52for.body.1:                                       ; preds = %for.body
53  %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
54  %2 = load i8, i8* %arrayidx.1, align 1
55  %conv6.1 = zext i8 %2 to i32
56  %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
57  %3 = load i8, i8* %arrayidx1.1, align 1
58  %conv27.1 = zext i8 %3 to i32
59  %add.1 = add nsw i32 %conv27.1, %conv6.1
60  %conv3.1 = trunc i32 %add.1 to i8
61  %arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %add5
62  store i8 %conv3.1, i8* %arrayidx4.1, align 1
63  %add5.1 = add i32 %add5, %s
64  %cmp.1 = icmp ult i32 %add5.1, %len
65  br i1 %cmp.1, label %for.body.2, label %for.end
66
67for.body.2:                                       ; preds = %for.body.1
68  %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
69  %4 = load i8, i8* %arrayidx.2, align 1
70  %conv6.2 = zext i8 %4 to i32
71  %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
72  %5 = load i8, i8* %arrayidx1.2, align 1
73  %conv27.2 = zext i8 %5 to i32
74  %add.2 = add nsw i32 %conv27.2, %conv6.2
75  %conv3.2 = trunc i32 %add.2 to i8
76  %arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %add5.1
77  store i8 %conv3.2, i8* %arrayidx4.2, align 1
78  %add5.2 = add i32 %add5.1, %s
79  %cmp.2 = icmp ult i32 %add5.2, %len
80  br i1 %cmp.2, label %for.body.3, label %for.end
81
82for.body.3:                                       ; preds = %for.body.2
83  %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
84  %6 = load i8, i8* %arrayidx.3, align 1
85  %conv6.3 = zext i8 %6 to i32
86  %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
87  %7 = load i8, i8* %arrayidx1.3, align 1
88  %conv27.3 = zext i8 %7 to i32
89  %add.3 = add nsw i32 %conv27.3, %conv6.3
90  %conv3.3 = trunc i32 %add.3 to i8
91  %arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %add5.2
92  store i8 %conv3.3, i8* %arrayidx4.3, align 1
93  %add5.3 = add i32 %add5.2, %s
94  %cmp.3 = icmp ult i32 %add5.3, %len
95  br i1 %cmp.3, label %for.body, label %for.end
96}
97