• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=GENERIC
2; RUN: opt -S -mcpu=kryo -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=KRYO
3
4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5target triple = "aarch64--linux-gnu"
6
7; These tests check that we vectorize the index calculations in the
8; gather-reduce pattern shown below. We check cases having i32 and i64
9; subtraction.
10;
11; int gather_reduce_8x16(short *a, short *b, short *g, int n) {
12;   int sum = 0;
13;   for (int i = 0; i < n ; ++i) {
14;     sum += g[*a++ - b[0]]; sum += g[*a++ - b[1]];
15;     sum += g[*a++ - b[2]]; sum += g[*a++ - b[3]];
16;     sum += g[*a++ - b[4]]; sum += g[*a++ - b[5]];
17;     sum += g[*a++ - b[6]]; sum += g[*a++ - b[7]];
18;   }
19;   return sum;
20; }
21
22; GENERIC-LABEL: @gather_reduce_8x16_i32
23;
24; GENERIC: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
25; GENERIC: zext <8 x i16> [[L]] to <8 x i32>
26; GENERIC: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
27; GENERIC: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
28; GENERIC: sext i32 [[X]] to i64
29;
30define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
31entry:
32  %cmp.99 = icmp sgt i32 %n, 0
33  br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
34
35for.body.preheader:
36  br label %for.body
37
38for.cond.cleanup.loopexit:
39  br label %for.cond.cleanup
40
41for.cond.cleanup:
42  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
43  ret i32 %sum.0.lcssa
44
45for.body:
46  %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
47  %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
48  %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
49  %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
50  %0 = load i16, i16* %a.addr.0101, align 2
51  %conv = zext i16 %0 to i32
52  %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
53  %1 = load i16, i16* %b, align 2
54  %conv2 = zext i16 %1 to i32
55  %sub = sub nsw i32 %conv, %conv2
56  %arrayidx = getelementptr inbounds i16, i16* %g, i32 %sub
57  %2 = load i16, i16* %arrayidx, align 2
58  %conv3 = zext i16 %2 to i32
59  %add = add nsw i32 %conv3, %sum.0102
60  %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
61  %3 = load i16, i16* %incdec.ptr, align 2
62  %conv5 = zext i16 %3 to i32
63  %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
64  %4 = load i16, i16* %incdec.ptr1, align 2
65  %conv7 = zext i16 %4 to i32
66  %sub8 = sub nsw i32 %conv5, %conv7
67  %arrayidx10 = getelementptr inbounds i16, i16* %g, i32 %sub8
68  %5 = load i16, i16* %arrayidx10, align 2
69  %conv11 = zext i16 %5 to i32
70  %add12 = add nsw i32 %add, %conv11
71  %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
72  %6 = load i16, i16* %incdec.ptr4, align 2
73  %conv14 = zext i16 %6 to i32
74  %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
75  %7 = load i16, i16* %incdec.ptr6, align 2
76  %conv16 = zext i16 %7 to i32
77  %sub17 = sub nsw i32 %conv14, %conv16
78  %arrayidx19 = getelementptr inbounds i16, i16* %g, i32 %sub17
79  %8 = load i16, i16* %arrayidx19, align 2
80  %conv20 = zext i16 %8 to i32
81  %add21 = add nsw i32 %add12, %conv20
82  %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
83  %9 = load i16, i16* %incdec.ptr13, align 2
84  %conv23 = zext i16 %9 to i32
85  %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
86  %10 = load i16, i16* %incdec.ptr15, align 2
87  %conv25 = zext i16 %10 to i32
88  %sub26 = sub nsw i32 %conv23, %conv25
89  %arrayidx28 = getelementptr inbounds i16, i16* %g, i32 %sub26
90  %11 = load i16, i16* %arrayidx28, align 2
91  %conv29 = zext i16 %11 to i32
92  %add30 = add nsw i32 %add21, %conv29
93  %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
94  %12 = load i16, i16* %incdec.ptr22, align 2
95  %conv32 = zext i16 %12 to i32
96  %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
97  %13 = load i16, i16* %incdec.ptr24, align 2
98  %conv34 = zext i16 %13 to i32
99  %sub35 = sub nsw i32 %conv32, %conv34
100  %arrayidx37 = getelementptr inbounds i16, i16* %g, i32 %sub35
101  %14 = load i16, i16* %arrayidx37, align 2
102  %conv38 = zext i16 %14 to i32
103  %add39 = add nsw i32 %add30, %conv38
104  %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
105  %15 = load i16, i16* %incdec.ptr31, align 2
106  %conv41 = zext i16 %15 to i32
107  %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
108  %16 = load i16, i16* %incdec.ptr33, align 2
109  %conv43 = zext i16 %16 to i32
110  %sub44 = sub nsw i32 %conv41, %conv43
111  %arrayidx46 = getelementptr inbounds i16, i16* %g, i32 %sub44
112  %17 = load i16, i16* %arrayidx46, align 2
113  %conv47 = zext i16 %17 to i32
114  %add48 = add nsw i32 %add39, %conv47
115  %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
116  %18 = load i16, i16* %incdec.ptr40, align 2
117  %conv50 = zext i16 %18 to i32
118  %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
119  %19 = load i16, i16* %incdec.ptr42, align 2
120  %conv52 = zext i16 %19 to i32
121  %sub53 = sub nsw i32 %conv50, %conv52
122  %arrayidx55 = getelementptr inbounds i16, i16* %g, i32 %sub53
123  %20 = load i16, i16* %arrayidx55, align 2
124  %conv56 = zext i16 %20 to i32
125  %add57 = add nsw i32 %add48, %conv56
126  %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
127  %21 = load i16, i16* %incdec.ptr49, align 2
128  %conv59 = zext i16 %21 to i32
129  %22 = load i16, i16* %incdec.ptr51, align 2
130  %conv61 = zext i16 %22 to i32
131  %sub62 = sub nsw i32 %conv59, %conv61
132  %arrayidx64 = getelementptr inbounds i16, i16* %g, i32 %sub62
133  %23 = load i16, i16* %arrayidx64, align 2
134  %conv65 = zext i16 %23 to i32
135  %add66 = add nsw i32 %add57, %conv65
136  %inc = add nuw nsw i32 %i.0103, 1
137  %exitcond = icmp eq i32 %inc, %n
138  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
139}
140
141; KRYO-LABEL: @gather_reduce_8x16_i64
142;
143; KRYO: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
144; KRYO: zext <8 x i16> [[L]] to <8 x i32>
145; KRYO: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
146; KRYO: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
147; KRYO: sext i32 [[X]] to i64
148;
149define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
150entry:
151  %cmp.99 = icmp sgt i32 %n, 0
152  br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
153
154for.body.preheader:
155  br label %for.body
156
157for.cond.cleanup.loopexit:
158  br label %for.cond.cleanup
159
160for.cond.cleanup:
161  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
162  ret i32 %sum.0.lcssa
163
164for.body:
165  %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
166  %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
167  %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
168  %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
169  %0 = load i16, i16* %a.addr.0101, align 2
170  %conv = zext i16 %0 to i64
171  %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
172  %1 = load i16, i16* %b, align 2
173  %conv2 = zext i16 %1 to i64
174  %sub = sub nsw i64 %conv, %conv2
175  %arrayidx = getelementptr inbounds i16, i16* %g, i64 %sub
176  %2 = load i16, i16* %arrayidx, align 2
177  %conv3 = zext i16 %2 to i32
178  %add = add nsw i32 %conv3, %sum.0102
179  %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
180  %3 = load i16, i16* %incdec.ptr, align 2
181  %conv5 = zext i16 %3 to i64
182  %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
183  %4 = load i16, i16* %incdec.ptr1, align 2
184  %conv7 = zext i16 %4 to i64
185  %sub8 = sub nsw i64 %conv5, %conv7
186  %arrayidx10 = getelementptr inbounds i16, i16* %g, i64 %sub8
187  %5 = load i16, i16* %arrayidx10, align 2
188  %conv11 = zext i16 %5 to i32
189  %add12 = add nsw i32 %add, %conv11
190  %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
191  %6 = load i16, i16* %incdec.ptr4, align 2
192  %conv14 = zext i16 %6 to i64
193  %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
194  %7 = load i16, i16* %incdec.ptr6, align 2
195  %conv16 = zext i16 %7 to i64
196  %sub17 = sub nsw i64 %conv14, %conv16
197  %arrayidx19 = getelementptr inbounds i16, i16* %g, i64 %sub17
198  %8 = load i16, i16* %arrayidx19, align 2
199  %conv20 = zext i16 %8 to i32
200  %add21 = add nsw i32 %add12, %conv20
201  %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
202  %9 = load i16, i16* %incdec.ptr13, align 2
203  %conv23 = zext i16 %9 to i64
204  %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
205  %10 = load i16, i16* %incdec.ptr15, align 2
206  %conv25 = zext i16 %10 to i64
207  %sub26 = sub nsw i64 %conv23, %conv25
208  %arrayidx28 = getelementptr inbounds i16, i16* %g, i64 %sub26
209  %11 = load i16, i16* %arrayidx28, align 2
210  %conv29 = zext i16 %11 to i32
211  %add30 = add nsw i32 %add21, %conv29
212  %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
213  %12 = load i16, i16* %incdec.ptr22, align 2
214  %conv32 = zext i16 %12 to i64
215  %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
216  %13 = load i16, i16* %incdec.ptr24, align 2
217  %conv34 = zext i16 %13 to i64
218  %sub35 = sub nsw i64 %conv32, %conv34
219  %arrayidx37 = getelementptr inbounds i16, i16* %g, i64 %sub35
220  %14 = load i16, i16* %arrayidx37, align 2
221  %conv38 = zext i16 %14 to i32
222  %add39 = add nsw i32 %add30, %conv38
223  %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
224  %15 = load i16, i16* %incdec.ptr31, align 2
225  %conv41 = zext i16 %15 to i64
226  %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
227  %16 = load i16, i16* %incdec.ptr33, align 2
228  %conv43 = zext i16 %16 to i64
229  %sub44 = sub nsw i64 %conv41, %conv43
230  %arrayidx46 = getelementptr inbounds i16, i16* %g, i64 %sub44
231  %17 = load i16, i16* %arrayidx46, align 2
232  %conv47 = zext i16 %17 to i32
233  %add48 = add nsw i32 %add39, %conv47
234  %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
235  %18 = load i16, i16* %incdec.ptr40, align 2
236  %conv50 = zext i16 %18 to i64
237  %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
238  %19 = load i16, i16* %incdec.ptr42, align 2
239  %conv52 = zext i16 %19 to i64
240  %sub53 = sub nsw i64 %conv50, %conv52
241  %arrayidx55 = getelementptr inbounds i16, i16* %g, i64 %sub53
242  %20 = load i16, i16* %arrayidx55, align 2
243  %conv56 = zext i16 %20 to i32
244  %add57 = add nsw i32 %add48, %conv56
245  %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
246  %21 = load i16, i16* %incdec.ptr49, align 2
247  %conv59 = zext i16 %21 to i64
248  %22 = load i16, i16* %incdec.ptr51, align 2
249  %conv61 = zext i16 %22 to i64
250  %sub62 = sub nsw i64 %conv59, %conv61
251  %arrayidx64 = getelementptr inbounds i16, i16* %g, i64 %sub62
252  %23 = load i16, i16* %arrayidx64, align 2
253  %conv65 = zext i16 %23 to i32
254  %add66 = add nsw i32 %add57, %conv65
255  %inc = add nuw nsw i32 %i.0103, 1
256  %exitcond = icmp eq i32 %inc, %n
257  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
258}
259