• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH
3; RUN: opt -S -loop-vectorize -force-vector-width=4 < %s | FileCheck %s -check-prefix=RIGHTVF
4; RUN: opt -S -loop-vectorize -force-vector-width=8 < %s | FileCheck %s -check-prefix=WRONGVF
5
6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
7
8; Vectorization with dependence checks.
9
10; No plausible dependence - can be vectorized.
11;  for (i = 0; i < 1024; ++i)
12;    A[i] = A[i + 1] + 1;
13
14; CHECK-LABEL: @f1_vec(
15; CHECK: <2 x i32>
16
17define void @f1_vec(i32* %A) {
18entry:
19  br label %for.body
20
21for.body:
22  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
23  %indvars.iv.next = add i32 %indvars.iv, 1
24  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
25  %0 = load i32, i32* %arrayidx, align 4
26  %add1 = add nsw i32 %0, 1
27  %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
28  store i32 %add1, i32* %arrayidx3, align 4
29  %exitcond = icmp ne i32 %indvars.iv.next, 1024
30  br i1 %exitcond, label %for.body, label %for.end
31
32for.end:
33  ret void
34}
35
36; Plausible dependence of distance 1 - can't be vectorized.
37;  for (i = 0; i < 1024; ++i)
38;    A[i+1] = A[i] + 1;
39
40; CHECK-LABEL: @f2_novec(
41; CHECK-NOT: <2 x i32>
42
43define void @f2_novec(i32* %A) {
44entry:
45  br label %for.body
46
47for.body:
48  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
49  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
50  %0 = load i32, i32* %arrayidx, align 4
51  %add = add nsw i32 %0, 1
52  %indvars.iv.next = add i32 %indvars.iv, 1
53  %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
54  store i32 %add, i32* %arrayidx3, align 4
55  %exitcond = icmp ne i32 %indvars.iv.next, 1024
56  br i1 %exitcond, label %for.body, label %for.end
57
58for.end:
59  ret void
60}
61
62; Plausible dependence of distance 2 - can be vectorized with a width of 2.
63;  for (i = 0; i < 1024; ++i)
64;    A[i+2] = A[i] + 1;
65
66; CHECK-LABEL: @f3_vec_len(
67; CHECK: <2 x i32>
68
69; WIDTH: f3_vec_len
70; WIDTH-NOT: <4 x i32>
71
72define void @f3_vec_len(i32* %A) {
73entry:
74  br label %for.body
75
76for.body:
77  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
78  %idxprom = sext i32 %i.01 to i64
79  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
80  %0 = load i32, i32* %arrayidx, align 4
81  %add = add nsw i32 %0, 1
82  %add1 = add nsw i32 %i.01, 2
83  %idxprom2 = sext i32 %add1 to i64
84  %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
85  store i32 %add, i32* %arrayidx3, align 4
86  %inc = add nsw i32 %i.01, 1
87  %cmp = icmp slt i32 %inc, 1024
88  br i1 %cmp, label %for.body, label %for.end
89
90for.end:
91  ret void
92}
93
94; Plausible dependence of distance 1 - cannot be vectorized (without reordering
95; accesses).
96;   for (i = 0; i < 1024; ++i) {
97;     B[i] = A[i];
98;     A[i] = B[i + 1];
99;   }
100
101; CHECK-LABEL: @f5(
102; CHECK-NOT: <2 x i32>
103
104define void @f5(i32*  %A, i32* %B) {
105entry:
106  br label %for.body
107
108for.body:
109  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
110  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
111  %0 = load i32, i32* %arrayidx, align 4
112  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
113  store i32 %0, i32* %arrayidx2, align 4
114  %indvars.iv.next = add nsw i64 %indvars.iv, 1
115  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv.next
116  %1 = load i32, i32* %arrayidx4, align 4
117  store i32 %1, i32* %arrayidx, align 4
118  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
119  %exitcond = icmp ne i32 %lftr.wideiv, 1024
120  br i1 %exitcond, label %for.body, label %for.end
121
122for.end:
123  ret void
124}
125
126; Dependence through a phi node - must not vectorize.
127;   for (i = 0; i < 1024; ++i) {
128;     a[i+1] = tmp;
129;     tmp = a[i];
130;   }
131
132; CHECK-LABEL: @f6
133; CHECK-NOT: <2 x i32>
134
135define i32 @f6(i32* %a, i32 %tmp) {
136entry:
137  br label %for.body
138
139for.body:
140  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
141  %tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
142  %indvars.iv.next = add nsw i64 %indvars.iv, 1
143  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
144  store i32 %tmp.addr.08, i32* %arrayidx, align 4
145  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
146  %0 = load i32, i32* %arrayidx3, align 4
147  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
148  %exitcond = icmp ne i32 %lftr.wideiv, 1024
149  br i1 %exitcond, label %for.body, label %for.end
150
151for.end:
152  ret i32 undef
153}
154
155; Don't vectorize true loop carried dependencies that are not a multiple of the
156; vector width.
157; Example:
158;   for (int i = ...; ++i) {
159;     a[i] = a[i-3] + ...;
160; It is a bad idea to vectorize this loop because store-load forwarding will not
161; happen.
162;
163
164; CHECK-LABEL: @nostoreloadforward(
165; CHECK-NOT: <2 x i32>
166
167define void @nostoreloadforward(i32* %A) {
168entry:
169  br label %for.body
170
171for.body:
172  %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
173  %0 = add nsw i64 %indvars.iv, -3
174  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
175  %1 = load i32, i32* %arrayidx, align 4
176  %2 = add nsw i64 %indvars.iv, 4
177  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2
178  %3 = load i32, i32* %arrayidx2, align 4
179  %add3 = add nsw i32 %3, %1
180  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
181  store i32 %add3, i32* %arrayidx5, align 4
182  %indvars.iv.next = add i64 %indvars.iv, 1
183  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
184  %exitcond = icmp ne i32 %lftr.wideiv, 128
185  br i1 %exitcond, label %for.body, label %for.end
186
187for.end:
188  ret void
189}
190
191; Example:
192;   for (int i = ...; ++i) {
193;     a[i] = b[i];
194;     c[i] = a[i-3] + ...;
195; It is a bad idea to vectorize this loop because store-load forwarding will not
196; happen.
197;
198
199; CHECK-LABEL: @nostoreloadforward2(
200; CHECK-NOT: <2 x i32>
201
202define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
203entry:
204  br label %for.body
205
206for.body:
207  %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
208  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
209  %0 = load i32, i32* %arrayidx, align 4
210  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
211  store i32 %0, i32* %arrayidx2, align 4
212  %1 = add nsw i64 %indvars.iv, -3
213  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
214  %2 = load i32, i32* %arrayidx4, align 4
215  %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
216  store i32 %2, i32* %arrayidx6, align 4
217  %indvars.iv.next = add i64 %indvars.iv, 1
218  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
219  %exitcond = icmp ne i32 %lftr.wideiv, 128
220  br i1 %exitcond, label %for.body, label %for.end
221
222for.end:
223  ret void
224}
225
226
227;Check the new calculation of the maximum safe distance in bits which can be vectorized.
228;The previous behavior did not take account that the stride was 2.
229;Therefore the maxVF was computed as 8 instead of 4, as the dependence distance here is 6 iterations, given by |N-(N-12)|/2.
230
231;#define M 32
232;#define N 2 * M
233;unsigned int a [N];
234;void pr34283(){
235;	unsigned int j=0;
236;   for (j = 0; j < M - 6; ++j)
237;    {
238;        a[N - 2 * j] = 69;
239;        a[N - 12 - 2 * j] = 7;
240;    }
241;
242;}
243
244; RIGHTVF-LABEL: @pr34283
245; RIGHTVF: <4 x i64>
246
247; WRONGVF-LABLE: @pr34283
248; WRONGVF-NOT: <8 x i64>
249
250@a = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16
251
252; Function Attrs: norecurse nounwind uwtable
253define void @pr34283() local_unnamed_addr {
254entry:
255  br label %for.body
256
257for.body:
258  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
259  %0 = shl i64 %indvars.iv, 1
260  %1 = sub nuw nsw i64 64, %0
261  %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %1
262  store i32 69, i32* %arrayidx, align 8
263  %2 = sub nuw nsw i64 52, %0
264  %arrayidx4 = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %2
265  store i32 7, i32* %arrayidx4, align 8
266  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
267  %exitcond = icmp eq i64 %indvars.iv.next, 26
268  br i1 %exitcond, label %for.end, label %for.body
269
270for.end:
271  ret void
272}
273
274