• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5target triple = "x86_64-apple-macosx10.8.0"
6
7; int foo(int * restrict B,  int * restrict A, int n, int m) {
8;   B[0] = n * A[0] + m * A[0];
9;   B[1] = n * A[1] + m * A[1];
10;   B[2] = n * A[2] + m * A[2];
11;   B[3] = n * A[3] + m * A[3];
12;   return 0;
13; }
14
15define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
16; CHECK-LABEL: @foo(
17; CHECK-NEXT:  entry:
18; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
19; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1
20; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
21; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
22; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
23; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
24; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
25; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
26; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
27; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
28; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
29; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
30; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
31; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
32; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
33; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
34; CHECK-NEXT:    ret i32 0
35;
36entry:
37  %0 = load i32, i32* %A, align 4
38  %mul238 = add i32 %m, %n
39  %add = mul i32 %0, %mul238
40  store i32 %add, i32* %B, align 4
41  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
42  %1 = load i32, i32* %arrayidx4, align 4
43  %add8 = mul i32 %1, %mul238
44  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
45  store i32 %add8, i32* %arrayidx9, align 4
46  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
47  %2 = load i32, i32* %arrayidx10, align 4
48  %add14 = mul i32 %2, %mul238
49  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
50  store i32 %add14, i32* %arrayidx15, align 4
51  %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
52  %3 = load i32, i32* %arrayidx16, align 4
53  %add20 = mul i32 %3, %mul238
54  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
55  store i32 %add20, i32* %arrayidx21, align 4
56  ret i32 0
57}
58
59
60; int extr_user(int * restrict B,  int * restrict A, int n, int m) {
61;   B[0] = n * A[0] + m * A[0];
62;   B[1] = n * A[1] + m * A[1];
63;   B[2] = n * A[2] + m * A[2];
64;   B[3] = n * A[3] + m * A[3];
65;   return A[0];
66; }
67
68define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
69; CHECK-LABEL: @extr_user(
70; CHECK-NEXT:  entry:
71; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
72; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1
73; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
74; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
75; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
76; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
77; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
78; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
79; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
80; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
81; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
82; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
83; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
84; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
85; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
86; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
87; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
88; CHECK-NEXT:    ret i32 [[TMP8]]
89;
90entry:
91  %0 = load i32, i32* %A, align 4
92  %mul238 = add i32 %m, %n
93  %add = mul i32 %0, %mul238
94  store i32 %add, i32* %B, align 4
95  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
96  %1 = load i32, i32* %arrayidx4, align 4
97  %add8 = mul i32 %1, %mul238
98  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
99  store i32 %add8, i32* %arrayidx9, align 4
100  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
101  %2 = load i32, i32* %arrayidx10, align 4
102  %add14 = mul i32 %2, %mul238
103  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
104  store i32 %add14, i32* %arrayidx15, align 4
105  %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
106  %3 = load i32, i32* %arrayidx16, align 4
107  %add20 = mul i32 %3, %mul238
108  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
109  store i32 %add20, i32* %arrayidx21, align 4
110  ret i32 %0  ;<--------- This value has multiple users
111}
112
113; In this example we have an external user that is not the first element in the vector.
114define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
115; CHECK-LABEL: @extr_user1(
116; CHECK-NEXT:  entry:
117; CHECK-NEXT:    [[MUL238:%.*]] = add i32 [[M:%.*]], [[N:%.*]]
118; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 1
119; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 1
120; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
121; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
122; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
123; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to <4 x i32>*
124; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
125; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL238]], i32 0
126; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[MUL238]], i32 1
127; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[MUL238]], i32 2
128; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[MUL238]], i32 3
129; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP1]], [[TMP5]]
130; CHECK-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
131; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[B]] to <4 x i32>*
132; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
133; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
134; CHECK-NEXT:    ret i32 [[TMP8]]
135;
136entry:
137  %0 = load i32, i32* %A, align 4
138  %mul238 = add i32 %m, %n
139  %add = mul i32 %0, %mul238
140  store i32 %add, i32* %B, align 4
141  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
142  %1 = load i32, i32* %arrayidx4, align 4
143  %add8 = mul i32 %1, %mul238
144  %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
145  store i32 %add8, i32* %arrayidx9, align 4
146  %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
147  %2 = load i32, i32* %arrayidx10, align 4
148  %add14 = mul i32 %2, %mul238
149  %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
150  store i32 %add14, i32* %arrayidx15, align 4
151  %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
152  %3 = load i32, i32* %arrayidx16, align 4
153  %add20 = mul i32 %3, %mul238
154  %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
155  store i32 %add20, i32* %arrayidx21, align 4
156  ret i32 %1  ;<--------- This value has multiple users
157}
158