• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -S | FileCheck %s
3
4; The function finds the smallest value from a float vector.
5; Check if vectorization is enabled by instruction flag `fcmp nnan`.
6
7define float @minloop(float* nocapture readonly %arg) {
8; CHECK-LABEL: @minloop(
9; CHECK-NEXT:  top:
10; CHECK-NEXT:    [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
11; CHECK-NEXT:    br label [[LOOP:%.*]]
12; CHECK:       loop:
13; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
14; CHECK-NEXT:    [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
15; CHECK-NEXT:    [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]]
16; CHECK-NEXT:    [[T4:%.*]] = load float, float* [[T3]], align 4
17; CHECK-NEXT:    [[T5:%.*]] = fcmp nnan olt float [[T2]], [[T4]]
18; CHECK-NEXT:    [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
19; CHECK-NEXT:    [[T7]] = add i64 [[T1]], 1
20; CHECK-NEXT:    [[T8:%.*]] = icmp eq i64 [[T7]], 65537
21; CHECK-NEXT:    br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
22; CHECK:       out:
23; CHECK-NEXT:    [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
24; CHECK-NEXT:    ret float [[T6_LCSSA]]
25;
26top:
27  %t = load float, float* %arg
28  br label %loop
29
30loop:                                             ; preds = %loop, %top
31  %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
32  %t2 = phi float [ %t6, %loop ], [ %t, %top ]
33  %t3 = getelementptr float, float* %arg, i64 %t1
34  %t4 = load float, float* %t3, align 4
35  %t5 = fcmp nnan olt float %t2, %t4
36  %t6 = select i1 %t5, float %t2, float %t4
37  %t7 = add i64 %t1, 1
38  %t8 = icmp eq i64 %t7, 65537
39  br i1 %t8, label %out, label %loop
40
41out:                                              ; preds = %loop
42  ret float %t6
43}
44
45; Check if vectorization is still enabled by function attribute.
46
47define float @minloopattr(float* nocapture readonly %arg) #0 {
48; CHECK-LABEL: @minloopattr(
49; CHECK-NEXT:  top:
50; CHECK-NEXT:    [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
51; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
52; CHECK:       vector.ph:
53; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[T]], i32 0
54; CHECK-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
55; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
56; CHECK:       vector.body:
57; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
58; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
59; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
60; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
61; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ARG]], i64 [[TMP0]]
62; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
63; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
64; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
65; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
66; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
67; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
68; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
69; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
70; CHECK:       middle.block:
71; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
72; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP5]], [[RDX_SHUF]]
73; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP5]], <4 x float> [[RDX_SHUF]]
74; CHECK-NEXT:    [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
75; CHECK-NEXT:    [[RDX_MINMAX_CMP2:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]]
76; CHECK-NEXT:    [[RDX_MINMAX_SELECT3:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]]
77; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0
78; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 65536, 65536
79; CHECK-NEXT:    br i1 [[CMP_N]], label [[OUT:%.*]], label [[SCALAR_PH]]
80; CHECK:       scalar.ph:
81; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 65537, [[MIDDLE_BLOCK]] ], [ 1, [[TOP:%.*]] ]
82; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[T]], [[TOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
83; CHECK-NEXT:    br label [[LOOP:%.*]]
84; CHECK:       loop:
85; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
86; CHECK-NEXT:    [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
87; CHECK-NEXT:    [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]]
88; CHECK-NEXT:    [[T4:%.*]] = load float, float* [[T3]], align 4
89; CHECK-NEXT:    [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
90; CHECK-NEXT:    [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
91; CHECK-NEXT:    [[T7]] = add i64 [[T1]], 1
92; CHECK-NEXT:    [[T8:%.*]] = icmp eq i64 [[T7]], 65537
93; CHECK-NEXT:    br i1 [[T8]], label [[OUT]], label [[LOOP]], !llvm.loop !2
94; CHECK:       out:
95; CHECK-NEXT:    [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
96; CHECK-NEXT:    ret float [[T6_LCSSA]]
97;
98top:
99  %t = load float, float* %arg
100  br label %loop
101
102loop:                                             ; preds = %loop, %top
103  %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
104  %t2 = phi float [ %t6, %loop ], [ %t, %top ]
105  %t3 = getelementptr float, float* %arg, i64 %t1
106  %t4 = load float, float* %t3, align 4
107  %t5 = fcmp olt float %t2, %t4
108  %t6 = select i1 %t5, float %t2, float %t4
109  %t7 = add i64 %t1, 1
110  %t8 = icmp eq i64 %t7, 65537
111  br i1 %t8, label %out, label %loop
112
113out:                                              ; preds = %loop
114  ret float %t6
115}
116
117; Check if vectorization is prevented without the flag or attribute.
118
119define float @minloopnovec(float* nocapture readonly %arg) {
120; CHECK-LABEL: @minloopnovec(
121; CHECK-NEXT:  top:
122; CHECK-NEXT:    [[T:%.*]] = load float, float* [[ARG:%.*]], align 4
123; CHECK-NEXT:    br label [[LOOP:%.*]]
124; CHECK:       loop:
125; CHECK-NEXT:    [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
126; CHECK-NEXT:    [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
127; CHECK-NEXT:    [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]]
128; CHECK-NEXT:    [[T4:%.*]] = load float, float* [[T3]], align 4
129; CHECK-NEXT:    [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
130; CHECK-NEXT:    [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
131; CHECK-NEXT:    [[T7]] = add i64 [[T1]], 1
132; CHECK-NEXT:    [[T8:%.*]] = icmp eq i64 [[T7]], 65537
133; CHECK-NEXT:    br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
134; CHECK:       out:
135; CHECK-NEXT:    [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
136; CHECK-NEXT:    ret float [[T6_LCSSA]]
137;
138top:
139  %t = load float, float* %arg
140  br label %loop
141
142loop:                                             ; preds = %loop, %top
143  %t1 = phi i64 [ %t7, %loop ], [ 1, %top ]
144  %t2 = phi float [ %t6, %loop ], [ %t, %top ]
145  %t3 = getelementptr float, float* %arg, i64 %t1
146  %t4 = load float, float* %t3, align 4
147  %t5 = fcmp olt float %t2, %t4
148  %t6 = select i1 %t5, float %t2, float %t4
149  %t7 = add i64 %t1, 1
150  %t8 = icmp eq i64 %t7, 65537
151  br i1 %t8, label %out, label %loop
152
153out:                                              ; preds = %loop
154  ret float %t6
155}
156
157attributes #0 = { "no-nans-fp-math"="true" }
158