• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
2
3target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
4
5; Float pattern:
6;   Check vectorization of reduction code which has an fadd instruction after
7;   an fcmp instruction which compares an array element and 0.
8;
9; float fcmp_0_fadd_select1(float * restrict x, const int N) {
10;   float sum = 0.
11;   for (int i = 0; i < N; ++i)
12;     if (x[i] > (float)0.)
13;       sum += x[i];
14;   return sum;
15; }
16
17; CHECK-LABEL: @fcmp_0_fadd_select1(
18; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
19; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
20; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
21define float @fcmp_0_fadd_select1(float* noalias %x, i32 %N) nounwind readonly {
22entry:
23  %cmp.1 = icmp sgt i32 %N, 0
24  br i1 %cmp.1, label %for.header, label %for.end
25
26for.header:                                       ; preds = %entry
27  %zext = zext i32 %N to i64
28  br label %for.body
29
30for.body:                                         ; preds = %header, %for.body
31  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
32  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
33  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
34  %0 = load float, float* %arrayidx, align 4
35  %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
36  %add = fadd fast float %0, %sum.1
37  %sum.2 = select i1 %cmp.2, float %add, float %sum.1
38  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
39  %exitcond = icmp eq i64 %indvars.iv.next, %zext
40  br i1 %exitcond, label %for.end, label %for.body
41
42for.end:                                          ; preds = %for.body, %entry
43  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
44  ret float %1
45}
46
47; Double pattern:
48;   Check vectorization of reduction code which has an fadd instruction after
49;   an fcmp instruction which compares an array element and 0.
50;
51; double fcmp_0_fadd_select2(double * restrict x, const int N) {
52;   double sum = 0.
53;   for (int i = 0; i < N; ++i)
54;     if (x[i] > 0.)
55;       sum += x[i];
56;   return sum;
57; }
58
59; CHECK-LABEL: @fcmp_0_fadd_select2(
60; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
61; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
62; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
63define double @fcmp_0_fadd_select2(double* noalias %x, i32 %N) nounwind readonly {
64entry:
65  %cmp.1 = icmp sgt i32 %N, 0
66  br i1 %cmp.1, label %for.header, label %for.end
67
68for.header:                                       ; preds = %entry
69  %zext = zext i32 %N to i64
70  br label %for.body
71
72for.body:                                         ; preds = %header, %for.body
73  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
74  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
75  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
76  %0 = load double, double* %arrayidx, align 4
77  %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
78  %add = fadd fast double %0, %sum.1
79  %sum.2 = select i1 %cmp.2, double %add, double %sum.1
80  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
81  %exitcond = icmp eq i64 %indvars.iv.next, %zext
82  br i1 %exitcond, label %for.end, label %for.body
83
84for.end:                                          ; preds = %for.body, %entry
85  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
86  ret double %1
87}
88
89; Float pattern:
90;   Check vectorization of reduction code which has an fadd instruction after
91;   an fcmp instruction which compares an array element and a floating-point
92;   value.
93;
94; float fcmp_val_fadd_select1(float * restrict x, float y, const int N) {
95;   float sum = 0.
96;   for (int i = 0; i < N; ++i)
97;     if (x[i] > y)
98;       sum += x[i];
99;   return sum;
100; }
101
102; CHECK-LABEL: @fcmp_val_fadd_select1(
103; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %broadcast.splat
104; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
105; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
106define float @fcmp_val_fadd_select1(float* noalias %x, float %y, i32 %N) nounwind readonly {
107entry:
108  %cmp.1 = icmp sgt i32 %N, 0
109  br i1 %cmp.1, label %for.header, label %for.end
110
111for.header:                                       ; preds = %entry
112  %zext = zext i32 %N to i64
113  br label %for.body
114
115for.body:                                         ; preds = %header, %for.body
116  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
117  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
118  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
119  %0 = load float, float* %arrayidx, align 4
120  %cmp.2 = fcmp fast ogt float %0, %y
121  %add = fadd fast float %0, %sum.1
122  %sum.2 = select i1 %cmp.2, float %add, float %sum.1
123  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
124  %exitcond = icmp eq i64 %indvars.iv.next, %zext
125  br i1 %exitcond, label %for.end, label %for.body
126
127for.end:                                          ; preds = %for.body, %entry
128  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
129  ret float %1
130}
131
132; Double pattern:
133;   Check vectorization of reduction code which has an fadd instruction after
134;   an fcmp instruction which compares an array element and a floating-point
135;   value.
136;
137; double fcmp_val_fadd_select2(double * restrict x, double y, const int N) {
138;   double sum = 0.
139;   for (int i = 0; i < N; ++i)
140;     if (x[i] > y)
141;       sum += x[i];
142;   return sum;
143; }
144
145; CHECK-LABEL: @fcmp_val_fadd_select2(
146; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %broadcast.splat
147; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
148; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
149define double @fcmp_val_fadd_select2(double* noalias %x, double %y, i32 %N) nounwind readonly {
150entry:
151  %cmp.1 = icmp sgt i32 %N, 0
152  br i1 %cmp.1, label %for.header, label %for.end
153
154for.header:                                       ; preds = %entry
155  %zext = zext i32 %N to i64
156  br label %for.body
157
158for.body:                                         ; preds = %header, %for.body
159  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
160  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
161  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
162  %0 = load double, double* %arrayidx, align 4
163  %cmp.2 = fcmp fast ogt double %0, %y
164  %add = fadd fast double %0, %sum.1
165  %sum.2 = select i1 %cmp.2, double %add, double %sum.1
166  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
167  %exitcond = icmp eq i64 %indvars.iv.next, %zext
168  br i1 %exitcond, label %for.end, label %for.body
169
170for.end:                                          ; preds = %for.body, %entry
171  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
172  ret double %1
173}
174
175; Float pattern:
176;   Check vectorization of reduction code which has an fadd instruction after
177;   an fcmp instruction which compares an array element and another array
178;   element.
179;
180; float fcmp_array_elm_fadd_select1(float * restrict x, float * restrict y,
181;                                   const int N) {
182;   float sum = 0.
183;   for (int i = 0; i < N; ++i)
184;     if (x[i] > y[i])
185;       sum += x[i];
186;   return sum;
187; }
188
189; CHECK-LABEL: @fcmp_array_elm_fadd_select1(
190; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %[[V1:.*]]
191; CHECK: %[[V4:.*]] = fadd fast <4 x float> %[[V0]], %[[V3:.*]]
192; CHECK: select <4 x i1> %[[V2]], <4 x float> %[[V4]], <4 x float> %[[V3]]
193define float @fcmp_array_elm_fadd_select1(float* noalias %x, float* noalias %y, i32 %N) nounwind readonly {
194entry:
195  %cmp.1 = icmp sgt i32 %N, 0
196  br i1 %cmp.1, label %for.header, label %for.end
197
198for.header:                                       ; preds = %entry
199  %zext = zext i32 %N to i64
200  br label %for.body
201
202for.body:                                         ; preds = %for.body, %for.header
203  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
204  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
205  %arrayidx.1 = getelementptr inbounds float, float* %x, i64 %indvars.iv
206  %0 = load float, float* %arrayidx.1, align 4
207  %arrayidx.2 = getelementptr inbounds float, float* %y, i64 %indvars.iv
208  %1 = load float, float* %arrayidx.2, align 4
209  %cmp.2 = fcmp fast ogt float %0, %1
210  %add = fadd fast float %0, %sum.1
211  %sum.2 = select i1 %cmp.2, float %add, float %sum.1
212  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
213  %exitcond = icmp eq i64 %indvars.iv.next, %zext
214  br i1 %exitcond, label %for.end, label %for.body
215
216for.end:                                          ; preds = %for.body, %entry
217  %2 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
218  ret float %2
219}
220
221; Double pattern:
222;   Check vectorization of reduction code which has an fadd instruction after
223;   an fcmp instruction which compares an array element and another array
224;   element.
225;
226; double fcmp_array_elm_fadd_select2(double * restrict x, double * restrict y,
227;                                    const int N) {
228;   double sum = 0.
229;   for (int i = 0; i < N; ++i)
230;     if (x[i] > y[i])
231;       sum += x[i];
232;   return sum;
233; }
234
235; CHECK-LABEL: @fcmp_array_elm_fadd_select2(
236; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %[[V1:.*]]
237; CHECK: %[[V4:.*]] = fadd fast <4 x double> %[[V0]], %[[V3:.*]]
238; CHECK: select <4 x i1> %[[V2]], <4 x double> %[[V4]], <4 x double> %[[V3]]
239define double @fcmp_array_elm_fadd_select2(double* noalias %x, double* noalias %y, i32 %N) nounwind readonly {
240entry:
241  %cmp.1 = icmp sgt i32 %N, 0
242  br i1 %cmp.1, label %for.header, label %for.end
243
244for.header:                                       ; preds = %entry
245  %zext = zext i32 %N to i64
246  br label %for.body
247
248for.body:                                         ; preds = %for.body, %for.header
249  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
250  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
251  %arrayidx.1 = getelementptr inbounds double, double* %x, i64 %indvars.iv
252  %0 = load double, double* %arrayidx.1, align 4
253  %arrayidx.2 = getelementptr inbounds double, double* %y, i64 %indvars.iv
254  %1 = load double, double* %arrayidx.2, align 4
255  %cmp.2 = fcmp fast ogt double %0, %1
256  %add = fadd fast double %0, %sum.1
257  %sum.2 = select i1 %cmp.2, double %add, double %sum.1
258  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
259  %exitcond = icmp eq i64 %indvars.iv.next, %zext
260  br i1 %exitcond, label %for.end, label %for.body
261
262for.end:                                          ; preds = %for.body, %entry
263  %2 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
264  ret double %2
265}
266
267; Float pattern:
268;   Check vectorization of reduction code which has an fsub instruction after
269;   an fcmp instruction which compares an array element and 0.
270;
271; float fcmp_0_fsub_select1(float * restrict x, const int N) {
272;   float sum = 0.
273;   for (int i = 0; i < N; ++i)
274;     if (x[i] > (float)0.)
275;       sum -= x[i];
276;   return sum;
277; }
278
279; CHECK-LABEL: @fcmp_0_fsub_select1(
280; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
281; CHECK: %[[V3:.*]] = fsub fast <4 x float> %[[V2:.*]], %[[V0]]
282; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
283define float @fcmp_0_fsub_select1(float* noalias %x, i32 %N) nounwind readonly {
284entry:
285  %cmp.1 = icmp sgt i32 %N, 0
286  br i1 %cmp.1, label %for.header, label %for.end
287
288for.header:                                       ; preds = %entry
289  %zext = zext i32 %N to i64
290  br label %for.body
291
292for.body:                                         ; preds = %for.body, %for.header
293  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
294  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
295  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
296  %0 = load float, float* %arrayidx, align 4
297  %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
298  %sub = fsub fast float %sum.1, %0
299  %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
300  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
301  %exitcond = icmp eq i64 %indvars.iv.next, %zext
302  br i1 %exitcond, label %for.end, label %for.body
303
304for.end:                                          ; preds = %for.body, %entry
305  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
306  ret float %1
307}
308
309; Float pattern:
310;   Check that is not vectorized if fp-instruction has no fast-math property.
311; float fcmp_0_fsub_select1_novectorize(float * restrict x, const int N) {
312;   float sum = 0.
313;   for (int i = 0; i < N; ++i)
314;     if (x[i] > (float)0.)
315;       sum -= x[i];
316;   return sum;
317; }
318
319; CHECK-LABEL: @fcmp_0_fsub_select1_novectorize(
320; CHECK-NOT: <4 x float>
321define float @fcmp_0_fsub_select1_novectorize(float* noalias %x, i32 %N) nounwind readonly {
322entry:
323  %cmp.1 = icmp sgt i32 %N, 0
324  br i1 %cmp.1, label %for.header, label %for.end
325
326for.header:                                       ; preds = %entry
327  %zext = zext i32 %N to i64
328  br label %for.body
329
330for.body:                                         ; preds = %for.body, %for.header
331  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
332  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
333  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
334  %0 = load float, float* %arrayidx, align 4
335  %cmp.2 = fcmp ogt float %0, 0.000000e+00
336  %sub = fsub float %sum.1, %0
337  %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
338  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
339  %exitcond = icmp eq i64 %indvars.iv.next, %zext
340  br i1 %exitcond, label %for.end, label %for.body
341
342for.end:                                          ; preds = %for.body, %entry
343  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
344  ret float %1
345}
346
347; Double pattern:
348;   Check vectorization of reduction code which has an fsub instruction after
349;   an fcmp instruction which compares an array element and 0.
350;
351; double fcmp_0_fsub_select2(double * restrict x, const int N) {
352;   double sum = 0.
353;   for (int i = 0; i < N; ++i)
354;     if (x[i] > 0.)
355;       sum -= x[i];
356;   return sum;
357; }
358
359; CHECK-LABEL: @fcmp_0_fsub_select2(
360; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
361; CHECK: %[[V3:.*]] = fsub fast <4 x double> %[[V2:.*]], %[[V0]]
362; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
363define double @fcmp_0_fsub_select2(double* noalias %x, i32 %N) nounwind readonly {
364entry:
365  %cmp.1 = icmp sgt i32 %N, 0
366  br i1 %cmp.1, label %for.header, label %for.end
367
368for.header:                                       ; preds = %entry
369  %zext = zext i32 %N to i64
370  br label %for.body
371
372for.body:                                         ; preds = %for.body, %for.header
373  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
374  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
375  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
376  %0 = load double, double* %arrayidx, align 4
377  %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
378  %sub = fsub fast double %sum.1, %0
379  %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
380  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
381  %exitcond = icmp eq i64 %indvars.iv.next, %zext
382  br i1 %exitcond, label %for.end, label %for.body
383
384for.end:                                          ; preds = %for.body, %entry
385  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
386  ret double %1
387}
388
389; Double pattern:
390; Check that is not vectorized if fp-instruction has no fast-math property.
391;
392; double fcmp_0_fsub_select2_notvectorize(double * restrict x, const int N) {
393;   double sum = 0.
394;   for (int i = 0; i < N; ++i)
395;     if (x[i] > 0.)
396;       sum -= x[i];
397;   return sum;
398; }
399
400; CHECK-LABEL: @fcmp_0_fsub_select2_notvectorize(
401; CHECK-NOT: <4 x doubole>
402define double @fcmp_0_fsub_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
403entry:
404  %cmp.1 = icmp sgt i32 %N, 0
405  br i1 %cmp.1, label %for.header, label %for.end
406
407for.header:                                       ; preds = %entry
408  %zext = zext i32 %N to i64
409  br label %for.body
410
411for.body:                                         ; preds = %for.body, %for.header
412  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
413  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
414  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
415  %0 = load double, double* %arrayidx, align 4
416  %cmp.2 = fcmp ogt double %0, 0.000000e+00
417  %sub = fsub double %sum.1, %0
418  %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
419  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
420  %exitcond = icmp eq i64 %indvars.iv.next, %zext
421  br i1 %exitcond, label %for.end, label %for.body
422
423for.end:                                          ; preds = %for.body, %entry
424  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
425  ret double %1
426}
427
428; Float pattern:
429;   Check vectorization of reduction code which has an fmul instruction after
430;   an fcmp instruction which compares an array element and 0.
431;
432; float fcmp_0_fmult_select1(float * restrict x, const int N) {
433;   float sum = 0.
434;   for (int i = 0; i < N; ++i)
435;     if (x[i] > (float)0.)
436;       sum *= x[i];
437;   return sum;
438; }
439
440; CHECK-LABEL: @fcmp_0_fmult_select1(
441; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
442; CHECK: %[[V3:.*]] = fmul fast <4 x float> %[[V2:.*]], %[[V0]]
443; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
444define float @fcmp_0_fmult_select1(float* noalias %x, i32 %N) nounwind readonly {
445entry:
446  %cmp.1 = icmp sgt i32 %N, 0
447  br i1 %cmp.1, label %for.header, label %for.end
448
449for.header:                                       ; preds = %entry
450  %zext = zext i32 %N to i64
451  br label %for.body
452
453for.body:                                         ; preds = %for.body, %for.header
454  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
455  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
456  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
457  %0 = load float, float* %arrayidx, align 4
458  %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
459  %mult = fmul fast float %sum.1, %0
460  %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
461  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
462  %exitcond = icmp eq i64 %indvars.iv.next, %zext
463  br i1 %exitcond, label %for.end, label %for.body
464
465for.end:                                          ; preds = %for.body, %entry
466  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
467  ret float %1
468}
469
470; Float pattern:
471;   Check that is not vectorized if fp-instruction has no fast-math property.
472;
473; float fcmp_0_fmult_select1_notvectorize(float * restrict x, const int N) {
474;   float sum = 0.
475;   for (int i = 0; i < N; ++i)
476;     if (x[i] > (float)0.)
477;       sum *= x[i];
478;   return sum;
479; }
480
481; CHECK-LABEL: @fcmp_0_fmult_select1_notvectorize(
482; CHECK-NOT: <4 x float>
483define float @fcmp_0_fmult_select1_notvectorize(float* noalias %x, i32 %N) nounwind readonly {
484entry:
485  %cmp.1 = icmp sgt i32 %N, 0
486  br i1 %cmp.1, label %for.header, label %for.end
487
488for.header:                                       ; preds = %entry
489  %zext = zext i32 %N to i64
490  br label %for.body
491
492for.body:                                         ; preds = %for.body, %for.header
493  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
494  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
495  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
496  %0 = load float, float* %arrayidx, align 4
497  %cmp.2 = fcmp ogt float %0, 0.000000e+00
498  %mult = fmul float %sum.1, %0
499  %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
500  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
501  %exitcond = icmp eq i64 %indvars.iv.next, %zext
502  br i1 %exitcond, label %for.end, label %for.body
503
504for.end:                                          ; preds = %for.body, %entry
505  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
506  ret float %1
507}
508
509; Double pattern:
510;   Check vectorization of reduction code which has an fmul instruction after
511;   an fcmp instruction which compares an array element and 0.
512;
513; double fcmp_0_fmult_select2(double * restrict x, const int N) {
514;   double sum = 0.
515;   for (int i = 0; i < N; ++i)
516;     if (x[i] > 0.)
517;       sum *= x[i];
518;   return sum;
519; }
520
521; CHECK-LABEL: @fcmp_0_fmult_select2(
522; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
523; CHECK: %[[V3:.*]] = fmul fast <4 x double> %[[V2:.*]], %[[V0]]
524; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
525define double @fcmp_0_fmult_select2(double* noalias %x, i32 %N) nounwind readonly {
526entry:
527  %cmp.1 = icmp sgt i32 %N, 0
528  br i1 %cmp.1, label %for.header, label %for.end
529
530for.header:                                       ; preds = %entry
531  %zext = zext i32 %N to i64
532  br label %for.body
533
534for.body:                                         ; preds = %for.body, %for.header
535  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
536  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
537  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
538  %0 = load double, double* %arrayidx, align 4
539  %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
540  %mult = fmul fast double %sum.1, %0
541  %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
542  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
543  %exitcond = icmp eq i64 %indvars.iv.next, %zext
544  br i1 %exitcond, label %for.end, label %for.body
545
546for.end:                                          ; preds = %for.body, %entry
547  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
548  ret double %1
549}
550
551; Double pattern:
552;   Check that is not vectorized if fp-instruction has no fast-math property.
553;
554; double fcmp_0_fmult_select2_notvectorize(double * restrict x, const int N) {
555;   double sum = 0.
556;   for (int i = 0; i < N; ++i)
557;     if (x[i] > 0.)
558;       sum *= x[i];
559;   return sum;
560; }
561
562; CHECK-LABEL: @fcmp_0_fmult_select2_notvectorize(
563; CHECK-NOT: <4 x double>
564define double @fcmp_0_fmult_select2_notvectorize(double* noalias %x, i32 %N) nounwind readonly {
565entry:
566  %cmp.1 = icmp sgt i32 %N, 0
567  br i1 %cmp.1, label %for.header, label %for.end
568
569for.header:                                       ; preds = %entry
570  %zext = zext i32 %N to i64
571  br label %for.body
572
573for.body:                                         ; preds = %for.body, %for.header
574  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
575  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
576  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
577  %0 = load double, double* %arrayidx, align 4
578  %cmp.2 = fcmp ogt double %0, 0.000000e+00
579  %mult = fmul double %sum.1, %0
580  %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
581  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
582  %exitcond = icmp eq i64 %indvars.iv.next, %zext
583  br i1 %exitcond, label %for.end, label %for.body
584
585for.end:                                          ; preds = %for.body, %entry
586  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
587  ret double %1
588}
589
590; Float multi pattern
591;   Check vectorisation of reduction code with a pair of selects to different
592;   fadd patterns.
593;
594; float fcmp_multi(float *a, int n) {
595;   float sum=0.0;
596;   for (int i=0;i<n;i++) {
597;     if (a[i]>1.0)
598;       sum+=a[i];
599;     else if (a[i]<3.0)
600;       sum+=2*a[i];
601;     else
602;       sum+=3*a[i];
603;   }
604;   return sum;
605; }
606
607; CHECK-LABEL: @fcmp_multi(
608; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
609; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
610; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], <float 3.000000e+00,
611; CHECK-DAG: %[[M2:.*]] = fmul fast <4 x float> %[[V0]], <float 2.000000e+00,
612; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
613; CHECK-DAG: %[[C12:.*]] = and <4 x i1> %[[C2]], %[[C11]]
614; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
615; CHECK: %[[C22:.*]] = and <4 x i1> %[[C21]], %[[C11]]
616; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]]
617; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]]
618; CHECK: fadd fast <4 x float> %[[S2]],
619define float @fcmp_multi(float* nocapture readonly %a, i32 %n) nounwind readonly {
620entry:
621  %cmp10 = icmp sgt i32 %n, 0
622  br i1 %cmp10, label %for.body.preheader, label %for.end
623
624for.body.preheader:                               ; preds = %entry
625  %wide.trip.count = zext i32 %n to i64
626  br label %for.body
627
628for.body:                                         ; preds = %for.inc, %for.body.preheader
629  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
630  %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
631  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
632  %0 = load float, float* %arrayidx, align 4
633  %cmp1 = fcmp ogt float %0, 1.000000e+00
634  br i1 %cmp1, label %for.inc, label %if.else
635
636if.else:                                          ; preds = %for.body
637  %cmp8 = fcmp olt float %0, 3.000000e+00
638  br i1 %cmp8, label %if.then10, label %if.else14
639
640if.then10:                                        ; preds = %if.else
641  %mul = fmul fast float %0, 2.000000e+00
642  br label %for.inc
643
644if.else14:                                        ; preds = %if.else
645  %mul17 = fmul fast float %0, 3.000000e+00
646  br label %for.inc
647
648for.inc:                                          ; preds = %for.body, %if.else14, %if.then10
649  %.pn = phi float [ %mul, %if.then10 ], [ %mul17, %if.else14 ], [ %0, %for.body ]
650  %sum.1 = fadd fast float %.pn, %sum.011
651  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
652  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
653  br i1 %exitcond, label %for.end, label %for.body
654
655for.end:                                          ; preds = %for.inc, %entry
656  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
657  ret float %sum.0.lcssa
658}
659
660; Float fadd + fsub patterns
661;   Check vectorisation of reduction code with a pair of selects to different
662;   instructions { fadd, fsub } but equivalent (change in constant).
663;
664; float fcmp_multi(float *a, int n) {
665;   float sum=0.0;
666;   for (int i=0;i<n;i++) {
667;     if (a[i]>1.0)
668;       sum+=a[i];
669;     else if (a[i]<3.0)
670;       sum-=a[i];
671;   }
672;   return sum;
673; }
674
675; CHECK-LABEL: @fcmp_fadd_fsub(
676; CHECK: %[[C1:.*]] = fcmp ogt <4 x float> %[[V0:.*]], <float 1.000000e+00,
677; CHECK: %[[C2:.*]] = fcmp olt <4 x float> %[[V0]], <float 3.000000e+00,
678; CHECK-DAG: %[[SUB:.*]] = fsub fast <4 x float>
679; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float>
680; CHECK: %[[C11:.*]] = xor <4 x i1> %[[C1]], <i1 true,
681; CHECK-DAG: %[[C12:.*]] = and <4 x i1> %[[C2]], %[[C11]]
682; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], <i1 true,
683; CHECK: %[[C22:.*]] = and <4 x i1> %[[C21]], %[[C11]]
684; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]]
685; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]]
686define float @fcmp_fadd_fsub(float* nocapture readonly %a, i32 %n) nounwind readonly {
687entry:
688  %cmp9 = icmp sgt i32 %n, 0
689  br i1 %cmp9, label %for.body.preheader, label %for.end
690
691for.body.preheader:                               ; preds = %entry
692  %wide.trip.count = zext i32 %n to i64
693  br label %for.body
694
695for.body:                                         ; preds = %for.inc, %for.body.preheader
696  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
697  %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
698  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
699  %0 = load float, float* %arrayidx, align 4
700  %cmp1 = fcmp ogt float %0, 1.000000e+00
701  br i1 %cmp1, label %if.then, label %if.else
702
703if.then:                                          ; preds = %for.body
704  %add = fadd fast float %0, %sum.010
705  br label %for.inc
706
707if.else:                                          ; preds = %for.body
708  %cmp8 = fcmp olt float %0, 3.000000e+00
709  br i1 %cmp8, label %if.then10, label %for.inc
710
711if.then10:                                        ; preds = %if.else
712  %sub = fsub fast float %sum.010, %0
713  br label %for.inc
714
715for.inc:                                          ; preds = %if.then, %if.then10, %if.else
716  %sum.1 = phi float [ %add, %if.then ], [ %sub, %if.then10 ], [ %sum.010, %if.else ]
717  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
718  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
719  br i1 %exitcond, label %for.end, label %for.body
720
721for.end:                                          ; preds = %for.inc, %entry
722  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
723  ret float %sum.0.lcssa
724}
725
726; Float fadd + fmul patterns
727;   Check lack of vectorisation of reduction code with a pair of non-compatible
728;   instructions { fadd, fmul }.
729;
730; float fcmp_multi(float *a, int n) {
731;   float sum=0.0;
732;   for (int i=0;i<n;i++) {
733;     if (a[i]>1.0)
734;       sum+=a[i];
735;     else if (a[i]<3.0)
736;       sum*=a[i];
737;   }
738;   return sum;
739; }
740
741; CHECK-LABEL: @fcmp_fadd_fmul(
742; CHECK-NOT: <4 x float>
743define float @fcmp_fadd_fmul(float* nocapture readonly %a, i32 %n) nounwind readonly {
744entry:
745  %cmp9 = icmp sgt i32 %n, 0
746  br i1 %cmp9, label %for.body.preheader, label %for.end
747
748for.body.preheader:                               ; preds = %entry
749  %wide.trip.count = zext i32 %n to i64
750  br label %for.body
751
752for.body:                                         ; preds = %for.inc, %for.body.preheader
753  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
754  %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
755  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
756  %0 = load float, float* %arrayidx, align 4
757  %cmp1 = fcmp ogt float %0, 1.000000e+00
758  br i1 %cmp1, label %if.then, label %if.else
759
760if.then:                                          ; preds = %for.body
761  %add = fadd fast float %0, %sum.010
762  br label %for.inc
763
764if.else:                                          ; preds = %for.body
765  %cmp8 = fcmp olt float %0, 3.000000e+00
766  br i1 %cmp8, label %if.then10, label %for.inc
767
768if.then10:                                        ; preds = %if.else
769  %mul = fmul fast float %0, %sum.010
770  br label %for.inc
771
772for.inc:                                          ; preds = %if.then, %if.then10, %if.else
773  %sum.1 = phi float [ %add, %if.then ], [ %mul, %if.then10 ], [ %sum.010, %if.else ]
774  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
775  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
776  br i1 %exitcond, label %for.end, label %for.body
777
778for.end:                                          ; preds = %for.inc, %entry
779  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
780  ret float %sum.0.lcssa
781}
782
783; Float fadd + store patterns
784;   Check lack of vectorisation of reduction code with a store back, given it
785;   has loop dependency on a[i].
786;
787; float fcmp_store_back(float a[], int LEN) {
788;     float sum = 0.0;
789;     for (int i = 0; i < LEN; i++) {
790;       sum += a[i];
791;       a[i] = sum;
792;     }
793;     return sum;
794; }
795
796; CHECK-LABEL: @fcmp_store_back(
797; CHECK-NOT: <4 x float>
798define float @fcmp_store_back(float* nocapture %a, i32 %LEN) nounwind readonly {
799entry:
800  %cmp7 = icmp sgt i32 %LEN, 0
801  br i1 %cmp7, label %for.body.preheader, label %for.end
802
803for.body.preheader:                               ; preds = %entry
804  %wide.trip.count = zext i32 %LEN to i64
805  br label %for.body
806
807for.body:                                         ; preds = %for.body, %for.body.preheader
808  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
809  %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
810  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
811  %0 = load float, float* %arrayidx, align 4
812  %add = fadd fast float %0, %sum.08
813  store float %add, float* %arrayidx, align 4
814  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
815  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
816  br i1 %exitcond, label %for.end, label %for.body
817
818for.end:                                          ; preds = %for.body, %entry
819  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
820  ret float %sum.0.lcssa
821}
822