• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4
5; Check vectorization on an interleaved load group of factor 2 and an interleaved
6; store group of factor 2.
7
8; int AB[1024];
9; int CD[1024];
10;  void test_array_load2_store2(int C, int D) {
11;   for (int i = 0; i < 1024; i+=2) {
12;     int A = AB[i];
13;     int B = AB[i+1];
14;     CD[i] = A + C;
15;     CD[i+1] = B * D;
16;   }
17; }
18
19; CHECK-LABEL: @test_array_load2_store2(
20; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
21; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
22; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
23; CHECK: add nsw <4 x i32>
24; CHECK: mul nsw <4 x i32>
25; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
26; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
27
28@AB = common global [1024 x i32] zeroinitializer, align 4
29@CD = common global [1024 x i32] zeroinitializer, align 4
30
31define void @test_array_load2_store2(i32 %C, i32 %D) {
32entry:
33  br label %for.body
34
35for.body:                                         ; preds = %for.body, %entry
36  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
37  %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
38  %tmp = load i32, i32* %arrayidx0, align 4
39  %tmp1 = or i64 %indvars.iv, 1
40  %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
41  %tmp2 = load i32, i32* %arrayidx1, align 4
42  %add = add nsw i32 %tmp, %C
43  %mul = mul nsw i32 %tmp2, %D
44  %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
45  store i32 %add, i32* %arrayidx2, align 4
46  %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
47  store i32 %mul, i32* %arrayidx3, align 4
48  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
49  %cmp = icmp slt i64 %indvars.iv.next, 1024
50  br i1 %cmp, label %for.body, label %for.end
51
52for.end:                                          ; preds = %for.body
53  ret void
54}
55
56; int A[3072];
57; struct ST S[1024];
58; void test_struct_st3() {
59;   int *ptr = A;
60;   for (int i = 0; i < 1024; i++) {
61;     int X1 = *ptr++;
62;     int X2 = *ptr++;
63;     int X3 = *ptr++;
64;     T[i].x = X1 + 1;
65;     T[i].y = X2 + 2;
66;     T[i].z = X3 + 3;
67;   }
68; }
69
70; CHECK-LABEL: @test_struct_array_load3_store3(
71; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
72; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
73; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
74; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
75; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
76; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
77; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
78; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
79; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
80; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
81; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
82
83%struct.ST3 = type { i32, i32, i32 }
84@A = common global [3072 x i32] zeroinitializer, align 4
85@S = common global [1024 x %struct.ST3] zeroinitializer, align 4
86
87define void @test_struct_array_load3_store3() {
88entry:
89  br label %for.body
90
91for.body:                                         ; preds = %for.body, %entry
92  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
93  %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
94  %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
95  %tmp = load i32, i32* %ptr.016, align 4
96  %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
97  %tmp1 = load i32, i32* %incdec.ptr, align 4
98  %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
99  %tmp2 = load i32, i32* %incdec.ptr1, align 4
100  %add = add nsw i32 %tmp, 1
101  %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
102  store i32 %add, i32* %x, align 4
103  %add3 = add nsw i32 %tmp1, 2
104  %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
105  store i32 %add3, i32* %y, align 4
106  %add6 = add nsw i32 %tmp2, 3
107  %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
108  store i32 %add6, i32* %z, align 4
109  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
110  %exitcond = icmp eq i64 %indvars.iv.next, 1024
111  br i1 %exitcond, label %for.end, label %for.body
112
113for.end:                                          ; preds = %for.body
114  ret void
115}
116
117; Check vectorization on an interleaved load group of factor 4.
118
119; struct ST4{
120;   int x;
121;   int y;
122;   int z;
123;   int w;
124; };
125; int test_struct_load4(struct ST4 *S) {
126;   int r = 0;
127;   for (int i = 0; i < 1024; i++) {
128;      r += S[i].x;
129;      r -= S[i].y;
130;      r += S[i].z;
131;      r -= S[i].w;
132;   }
133;   return r;
134; }
135
136; CHECK-LABEL: @test_struct_load4(
137; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
138; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
139; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
140; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
141; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
142; CHECK: add nsw <4 x i32>
143; CHECK: sub <4 x i32>
144; CHECK: add nsw <4 x i32>
145; CHECK: sub <4 x i32>
146
147%struct.ST4 = type { i32, i32, i32, i32 }
148
149define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
150entry:
151  br label %for.body
152
153for.body:                                         ; preds = %for.body, %entry
154  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
155  %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
156  %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
157  %tmp = load i32, i32* %x, align 4
158  %add = add nsw i32 %tmp, %r.022
159  %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
160  %tmp1 = load i32, i32* %y, align 4
161  %sub = sub i32 %add, %tmp1
162  %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
163  %tmp2 = load i32, i32* %z, align 4
164  %add5 = add nsw i32 %sub, %tmp2
165  %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
166  %tmp3 = load i32, i32* %w, align 4
167  %sub8 = sub i32 %add5, %tmp3
168  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
169  %exitcond = icmp eq i64 %indvars.iv.next, 1024
170  br i1 %exitcond, label %for.end, label %for.body
171
172for.end:                                          ; preds = %for.body
173  ret i32 %sub8
174}
175
176; Check vectorization on an interleaved store group of factor 4.
177
178; void test_struct_store4(int *A, struct ST4 *B) {
179;   int *ptr = A;
180;   for (int i = 0; i < 1024; i++) {
181;     int X = *ptr++;
182;     B[i].x = X + 1;
183;     B[i].y = X * 2;
184;     B[i].z = X + 3;
185;     B[i].w = X + 4;
186;   }
187; }
188
189; CHECK-LABEL: @test_struct_store4(
190; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>*
191; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
192; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
193; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
194; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
195; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
196; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
197; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
198; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
199
200define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
201entry:
202  br label %for.body
203
204for.cond.cleanup:                                 ; preds = %for.body
205  ret void
206
207for.body:                                         ; preds = %for.body, %entry
208  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
209  %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
210  %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
211  %tmp = load i32, i32* %ptr.024, align 4
212  %add = add nsw i32 %tmp, 1
213  %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
214  store i32 %add, i32* %x, align 4
215  %mul = shl nsw i32 %tmp, 1
216  %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
217  store i32 %mul, i32* %y, align 4
218  %add3 = add nsw i32 %tmp, 3
219  %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
220  store i32 %add3, i32* %z, align 4
221  %add6 = add nsw i32 %tmp, 4
222  %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
223  store i32 %add6, i32* %w, align 4
224  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
225  %exitcond = icmp eq i64 %indvars.iv.next, 1024
226  br i1 %exitcond, label %for.cond.cleanup, label %for.body
227}
228
229; Check vectorization on a reverse interleaved load group of factor 2 and
230; a reverse interleaved store group of factor 2.
231
232; struct ST2 {
233;  int x;
234;  int y;
235; };
236;
237; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
238;   for (int i = 1023; i >= 0; i--) {
239;     int a = A[i].x + i;  // interleaved load of index 0
240;     int b = A[i].y - i;  // interleaved load of index 1
241;     B[i].x = a;          // interleaved store of index 0
242;     B[i].y = b;          // interleaved store of index 1
243;   }
244; }
245
246; CHECK-LABEL: @test_reversed_load2_store2(
247; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
248; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
249; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
250; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
251; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
252; CHECK: add nsw <4 x i32>
253; CHECK: sub nsw <4 x i32>
254; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
255; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
256; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
257; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
258
259%struct.ST2 = type { i32, i32 }
260
261define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
262entry:
263  br label %for.body
264
265for.cond.cleanup:                                 ; preds = %for.body
266  ret void
267
268for.body:                                         ; preds = %for.body, %entry
269  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
270  %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
271  %tmp = load i32, i32* %x, align 4
272  %tmp1 = trunc i64 %indvars.iv to i32
273  %add = add nsw i32 %tmp, %tmp1
274  %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
275  %tmp2 = load i32, i32* %y, align 4
276  %sub = sub nsw i32 %tmp2, %tmp1
277  %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
278  store i32 %add, i32* %x5, align 4
279  %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
280  store i32 %sub, i32* %y8, align 4
281  %indvars.iv.next = add nsw i64 %indvars.iv, -1
282  %cmp = icmp sgt i64 %indvars.iv, 0
283  br i1 %cmp, label %for.body, label %for.cond.cleanup
284}
285
286; Check vectorization on an interleaved load group of factor 2 with 1 gap
287; (missing the load of odd elements).
288
289; void even_load(int *A, int *B) {
290;  for (unsigned i = 0; i < 1024; i+=2)
291;     B[i/2] = A[i] * 2;
292; }
293
294; CHECK-LABEL: @even_load(
295; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
296; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
297; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
298; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
299
300define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
301entry:
302  br label %for.body
303
304for.cond.cleanup:                                 ; preds = %for.body
305  ret void
306
307for.body:                                         ; preds = %for.body, %entry
308  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
309  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
310  %tmp = load i32, i32* %arrayidx, align 4
311  %mul = shl nsw i32 %tmp, 1
312  %tmp1 = lshr exact i64 %indvars.iv, 1
313  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
314  store i32 %mul, i32* %arrayidx2, align 4
315  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
316  %cmp = icmp ult i64 %indvars.iv.next, 1024
317  br i1 %cmp, label %for.body, label %for.cond.cleanup
318}
319
320; Check vectorization on interleaved access groups identified from mixed
321; loads/stores.
322; void mixed_load2_store2(int *A, int *B) {
323;   for (unsigned i = 0; i < 1024; i+=2)  {
324;     B[i] = A[i] * A[i+1];
325;     B[i+1] = A[i] + A[i+1];
326;   }
327; }
328
329; CHECK-LABEL: @mixed_load2_store2(
330; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
331; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
332; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
333; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
334; CHECK: store <8 x i32> %interleaved.vec
335
336define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
337entry:
338  br label %for.body
339
340for.cond.cleanup:                                 ; preds = %for.body
341  ret void
342
343for.body:                                         ; preds = %for.body, %entry
344  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
345  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
346  %tmp = load i32, i32* %arrayidx, align 4
347  %tmp1 = or i64 %indvars.iv, 1
348  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
349  %tmp2 = load i32, i32* %arrayidx2, align 4
350  %mul = mul nsw i32 %tmp2, %tmp
351  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
352  store i32 %mul, i32* %arrayidx4, align 4
353  %tmp3 = load i32, i32* %arrayidx, align 4
354  %tmp4 = load i32, i32* %arrayidx2, align 4
355  %add10 = add nsw i32 %tmp4, %tmp3
356  %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
357  store i32 %add10, i32* %arrayidx13, align 4
358  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
359  %cmp = icmp ult i64 %indvars.iv.next, 1024
360  br i1 %cmp, label %for.body, label %for.cond.cleanup
361}
362
363; Check vectorization on interleaved access groups identified from mixed
364; loads/stores.
365; void mixed_load3_store3(int *A) {
366;   for (unsigned i = 0; i < 1024; i++)  {
367;     *A++ += i;
368;     *A++ += i;
369;     *A++ += i;
370;   }
371; }
372
373; CHECK-LABEL: @mixed_load3_store3(
374; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
375; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
376; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
377; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
378; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
379; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
380
381define void @mixed_load3_store3(i32* nocapture %A) {
382entry:
383  br label %for.body
384
385for.cond.cleanup:                                 ; preds = %for.body
386  ret void
387
388for.body:                                         ; preds = %for.body, %entry
389  %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
390  %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
391  %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
392  %tmp = load i32, i32* %A.addr.012, align 4
393  %add = add i32 %tmp, %i.013
394  store i32 %add, i32* %A.addr.012, align 4
395  %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
396  %tmp1 = load i32, i32* %incdec.ptr, align 4
397  %add2 = add i32 %tmp1, %i.013
398  store i32 %add2, i32* %incdec.ptr, align 4
399  %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
400  %tmp2 = load i32, i32* %incdec.ptr1, align 4
401  %add4 = add i32 %tmp2, %i.013
402  store i32 %add4, i32* %incdec.ptr1, align 4
403  %inc = add nuw nsw i32 %i.013, 1
404  %exitcond = icmp eq i32 %inc, 1024
405  br i1 %exitcond, label %for.cond.cleanup, label %for.body
406}
407
408; Check vectorization on interleaved access groups with members having different
409; kinds of type.
410
411; struct IntFloat {
412;   int a;
413;   float b;
414; };
415;
416; int SA;
417; float SB;
418;
419; void int_float_struct(struct IntFloat *A) {
420;   int SumA;
421;   float SumB;
422;   for (unsigned i = 0; i < 1024; i++)  {
423;     SumA += A[i].a;
424;     SumB += A[i].b;
425;   }
426;   SA = SumA;
427;   SB = SumB;
428; }
429
430; CHECK-LABEL: @int_float_struct(
431; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
432; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
433; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
434; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
435; CHECK: add nsw <4 x i32>
436; CHECK: fadd fast <4 x float>
437
438%struct.IntFloat = type { i32, float }
439
440@SA = common global i32 0, align 4
441@SB = common global float 0.000000e+00, align 4
442
443define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
444entry:
445  br label %for.body
446
447for.cond.cleanup:                                 ; preds = %for.body
448  store i32 %add, i32* @SA, align 4
449  store float %add3, float* @SB, align 4
450  ret void
451
452for.body:                                         ; preds = %for.body, %entry
453  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
454  %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
455  %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
456  %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
457  %tmp = load i32, i32* %a, align 4
458  %add = add nsw i32 %tmp, %SumA.013
459  %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
460  %tmp1 = load float, float* %b, align 4
461  %add3 = fadd fast float %SumB.014, %tmp1
462  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
463  %exitcond = icmp eq i64 %indvars.iv.next, 1024
464  br i1 %exitcond, label %for.cond.cleanup, label %for.body
465}
466
467attributes #0 = { "unsafe-fp-math"="true" }
468