• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
3; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s -check-prefix=VF2UF2
4; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s -check-prefix=VF1UF4
5
6target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
7
8; Make sure a loop is vectorized correctly with fold-tail when the constant
9; trip-count is not a multiple of -force-vector-width and/or
10; -force-vector-interleave, but is a multiple of the internally computed MaxVF;
11; e.g., when all types are i32 lead to MaxVF=1.
12
13define void @pr45679(i32* %A) optsize {
14; CHECK-LABEL: @pr45679(
15; CHECK-NEXT:  entry:
16; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17; CHECK:       vector.ph:
18; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
19; CHECK:       vector.body:
20; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
21; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
22; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], <i32 13, i32 13, i32 13, i32 13>
23; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
24; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
25; CHECK:       pred.store.if:
26; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
27; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP2]]
28; CHECK-NEXT:    store i32 13, i32* [[TMP3]], align 1
29; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
30; CHECK:       pred.store.continue:
31; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
32; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
33; CHECK:       pred.store.if1:
34; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
35; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP5]]
36; CHECK-NEXT:    store i32 13, i32* [[TMP6]], align 1
37; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
38; CHECK:       pred.store.continue2:
39; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
40; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
41; CHECK:       pred.store.if3:
42; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[INDEX]], 2
43; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP8]]
44; CHECK-NEXT:    store i32 13, i32* [[TMP9]], align 1
45; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
46; CHECK:       pred.store.continue4:
47; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
48; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
49; CHECK:       pred.store.if5:
50; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[INDEX]], 3
51; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP11]]
52; CHECK-NEXT:    store i32 13, i32* [[TMP12]], align 1
53; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
54; CHECK:       pred.store.continue6:
55; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
56; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
57; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
58; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
59; CHECK:       middle.block:
60; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
61; CHECK:       scalar.ph:
62; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
63; CHECK-NEXT:    br label [[LOOP:%.*]]
64; CHECK:       loop:
65; CHECK-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
66; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
67; CHECK-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
68; CHECK-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
69; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
70; CHECK-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2
71; CHECK:       exit:
72; CHECK-NEXT:    ret void
73;
74; VF2UF2-LABEL: @pr45679(
75; VF2UF2-NEXT:  entry:
76; VF2UF2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
77; VF2UF2:       vector.ph:
78; VF2UF2-NEXT:    br label [[VECTOR_BODY:%.*]]
79; VF2UF2:       vector.body:
80; VF2UF2-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
81; VF2UF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ]
82; VF2UF2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
83; VF2UF2-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], <i32 13, i32 13>
84; VF2UF2-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], <i32 13, i32 13>
85; VF2UF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
86; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
87; VF2UF2:       pred.store.if:
88; VF2UF2-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
89; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP3]]
90; VF2UF2-NEXT:    store i32 13, i32* [[TMP4]], align 1
91; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
92; VF2UF2:       pred.store.continue:
93; VF2UF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
94; VF2UF2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
95; VF2UF2:       pred.store.if2:
96; VF2UF2-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
97; VF2UF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP6]]
98; VF2UF2-NEXT:    store i32 13, i32* [[TMP7]], align 1
99; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
100; VF2UF2:       pred.store.continue3:
101; VF2UF2-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
102; VF2UF2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
103; VF2UF2:       pred.store.if4:
104; VF2UF2-NEXT:    [[TMP9:%.*]] = add i32 [[INDEX]], 2
105; VF2UF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP9]]
106; VF2UF2-NEXT:    store i32 13, i32* [[TMP10]], align 1
107; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
108; VF2UF2:       pred.store.continue5:
109; VF2UF2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
110; VF2UF2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
111; VF2UF2:       pred.store.if6:
112; VF2UF2-NEXT:    [[TMP12:%.*]] = add i32 [[INDEX]], 3
113; VF2UF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP12]]
114; VF2UF2-NEXT:    store i32 13, i32* [[TMP13]], align 1
115; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
116; VF2UF2:       pred.store.continue7:
117; VF2UF2-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
118; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
119; VF2UF2-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
120; VF2UF2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
121; VF2UF2:       middle.block:
122; VF2UF2-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
123; VF2UF2:       scalar.ph:
124; VF2UF2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
125; VF2UF2-NEXT:    br label [[LOOP:%.*]]
126; VF2UF2:       loop:
127; VF2UF2-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
128; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
129; VF2UF2-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
130; VF2UF2-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
131; VF2UF2-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
132; VF2UF2-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2
133; VF2UF2:       exit:
134; VF2UF2-NEXT:    ret void
135;
136; VF1UF4-LABEL: @pr45679(
137; VF1UF4-NEXT:  entry:
138; VF1UF4-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
139; VF1UF4:       vector.ph:
140; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
141; VF1UF4:       vector.body:
142; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
143; VF1UF4-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
144; VF1UF4-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
145; VF1UF4-NEXT:    [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
146; VF1UF4-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
147; VF1UF4-NEXT:    [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13
148; VF1UF4-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13
149; VF1UF4-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13
150; VF1UF4-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13
151; VF1UF4-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
152; VF1UF4:       pred.store.if:
153; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]]
154; VF1UF4-NEXT:    store i32 13, i32* [[TMP4]], align 1
155; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
156; VF1UF4:       pred.store.continue:
157; VF1UF4-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
158; VF1UF4:       pred.store.if4:
159; VF1UF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]]
160; VF1UF4-NEXT:    store i32 13, i32* [[TMP5]], align 1
161; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE5]]
162; VF1UF4:       pred.store.continue5:
163; VF1UF4-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
164; VF1UF4:       pred.store.if6:
165; VF1UF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]]
166; VF1UF4-NEXT:    store i32 13, i32* [[TMP6]], align 1
167; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE7]]
168; VF1UF4:       pred.store.continue7:
169; VF1UF4-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
170; VF1UF4:       pred.store.if8:
171; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]]
172; VF1UF4-NEXT:    store i32 13, i32* [[TMP7]], align 1
173; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE9]]
174; VF1UF4:       pred.store.continue9:
175; VF1UF4-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
176; VF1UF4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
177; VF1UF4-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
178; VF1UF4:       middle.block:
179; VF1UF4-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
180; VF1UF4:       scalar.ph:
181; VF1UF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
182; VF1UF4-NEXT:    br label [[LOOP:%.*]]
183; VF1UF4:       loop:
184; VF1UF4-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
185; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
186; VF1UF4-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
187; VF1UF4-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
188; VF1UF4-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
189; VF1UF4-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]]
190; VF1UF4:       exit:
191; VF1UF4-NEXT:    ret void
192;
193entry:
194  br label %loop
195
196loop:
197  %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
198  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv
199  store i32 13, i32* %arrayidx, align 1
200  %rivPlus1 = add nuw nsw i32 %riv, 1
201  %cond = icmp eq i32 %rivPlus1, 14
202  br i1 %cond, label %exit, label %loop
203
204exit:
205  ret void
206}
207