• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt -loop-vectorize -force-vector-width=2 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_2
2; RUN: opt -loop-vectorize -force-vector-width=4 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_4
3; RUN: opt -loop-vectorize -force-vector-width=8 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_8
4; RUN: opt -loop-vectorize -force-vector-width=16 -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=VF_16
5; REQUIRES: asserts
6
7target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
8target triple = "thumbv8.1m.main-none-eabi"
9
10; Factor 2
11
12%i8.2 = type {i8, i8}
13define void @i8_factor_2(%i8.2* %data, i64 %n) #0 {
14entry:
15  br label %for.body
16
17; VF_2-LABEL:  Checking a loop in "i8_factor_2"
18; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
19; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
20; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
21; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
22; VF_4-LABEL:  Checking a loop in "i8_factor_2"
23; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
24; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
25; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
26; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
27; VF_8-LABEL:  Checking a loop in "i8_factor_2"
28; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
29; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
30; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
31; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
32; VF_16-LABEL: Checking a loop in "i8_factor_2"
33; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1
34; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1
35; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
36; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
37for.body:
38  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
39  %tmp0 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 0
40  %tmp1 = getelementptr inbounds %i8.2, %i8.2* %data, i64 %i, i32 1
41  %tmp2 = load i8, i8* %tmp0, align 1
42  %tmp3 = load i8, i8* %tmp1, align 1
43  store i8 0, i8* %tmp0, align 1
44  store i8 0, i8* %tmp1, align 1
45  %i.next = add nuw nsw i64 %i, 1
46  %cond = icmp slt i64 %i.next, %n
47  br i1 %cond, label %for.body, label %for.end
48
49for.end:
50  ret void
51}
52
53%i16.2 = type {i16, i16}
54define void @i16_factor_2(%i16.2* %data, i64 %n) #0 {
55entry:
56  br label %for.body
57
58; VF_2-LABEL:  Checking a loop in "i16_factor_2"
59; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
60; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
61; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
62; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
63; VF_4-LABEL:  Checking a loop in "i16_factor_2"
64; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
65; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
66; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
67; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
68; VF_8-LABEL:  Checking a loop in "i16_factor_2"
69; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
70; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
71; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
72; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
73; VF_16-LABEL: Checking a loop in "i16_factor_2"
74; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2
75; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2
76; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
77; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
78for.body:
79  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
80  %tmp0 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 0
81  %tmp1 = getelementptr inbounds %i16.2, %i16.2* %data, i64 %i, i32 1
82  %tmp2 = load i16, i16* %tmp0, align 2
83  %tmp3 = load i16, i16* %tmp1, align 2
84  store i16 0, i16* %tmp0, align 2
85  store i16 0, i16* %tmp1, align 2
86  %i.next = add nuw nsw i64 %i, 1
87  %cond = icmp slt i64 %i.next, %n
88  br i1 %cond, label %for.body, label %for.end
89
90for.end:
91  ret void
92}
93
94%i32.2 = type {i32, i32}
95define void @i32_factor_2(%i32.2* %data, i64 %n) #0 {
96entry:
97  br label %for.body
98
99; VF_2-LABEL:  Checking a loop in "i32_factor_2"
100; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
101; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
102; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
103; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
104; VF_4-LABEL:  Checking a loop in "i32_factor_2"
105; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
106; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
107; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
108; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
109; VF_8-LABEL:  Checking a loop in "i32_factor_2"
110; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
111; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
112; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
113; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
114; VF_16-LABEL: Checking a loop in "i32_factor_2"
115; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4
116; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4
117; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
118; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
119for.body:
120  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
121  %tmp0 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 0
122  %tmp1 = getelementptr inbounds %i32.2, %i32.2* %data, i64 %i, i32 1
123  %tmp2 = load i32, i32* %tmp0, align 4
124  %tmp3 = load i32, i32* %tmp1, align 4
125  store i32 0, i32* %tmp0, align 4
126  store i32 0, i32* %tmp1, align 4
127  %i.next = add nuw nsw i64 %i, 1
128  %cond = icmp slt i64 %i.next, %n
129  br i1 %cond, label %for.body, label %for.end
130
131for.end:
132  ret void
133}
134
135%i64.2 = type {i64, i64}
136define void @i64_factor_2(%i64.2* %data, i64 %n) #0 {
137entry:
138  br label %for.body
139
140; VF_2-LABEL:  Checking a loop in "i64_factor_2"
141; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
142; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
143; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
144; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
145; VF_4-LABEL:  Checking a loop in "i64_factor_2"
146; VF_4:          Found an estimated cost of 80 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
147; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
148; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
149; VF_4-NEXT:     Found an estimated cost of 48 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
150; VF_8-LABEL:  Checking a loop in "i64_factor_2"
151; VF_8:          Found an estimated cost of 288 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
152; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
153; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
154; VF_8-NEXT:     Found an estimated cost of 160 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
155; VF_16-LABEL: Checking a loop in "i64_factor_2"
156; VF_16:         Found an estimated cost of 1088 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8
157; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8
158; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
159; VF_16-NEXT:    Found an estimated cost of 576 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
160for.body:
161  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
162  %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0
163  %tmp1 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 1
164  %tmp2 = load i64, i64* %tmp0, align 8
165  %tmp3 = load i64, i64* %tmp1, align 8
166  store i64 0, i64* %tmp0, align 8
167  store i64 0, i64* %tmp1, align 8
168  %i.next = add nuw nsw i64 %i, 1
169  %cond = icmp slt i64 %i.next, %n
170  br i1 %cond, label %for.body, label %for.end
171
172for.end:
173  ret void
174}
175
176%f16.2 = type {half, half}
177define void @f16_factor_2(%f16.2* %data, i64 %n) #0 {
178entry:
179  br label %for.body
180
181; VF_2-LABEL:  Checking a loop in "f16_factor_2"
182; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load half, half* %tmp0, align 2
183; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, half* %tmp1, align 2
184; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
185; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
186; VF_4-LABEL:  Checking a loop in "f16_factor_2"
187; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2
188; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2
189; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
190; VF_4-NEXT:     Found an estimated cost of 40 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
191; VF_8-LABEL:  Checking a loop in "f16_factor_2"
192; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2
193; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2
194; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
195; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
196; VF_16-LABEL: Checking a loop in "f16_factor_2"
197; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, half* %tmp0, align 2
198; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, half* %tmp1, align 2
199; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
200; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
201for.body:
202  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
203  %tmp0 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 0
204  %tmp1 = getelementptr inbounds %f16.2, %f16.2* %data, i64 %i, i32 1
205  %tmp2 = load half, half* %tmp0, align 2
206  %tmp3 = load half, half* %tmp1, align 2
207  store half 0.0, half* %tmp0, align 2
208  store half 0.0, half* %tmp1, align 2
209  %i.next = add nuw nsw i64 %i, 1
210  %cond = icmp slt i64 %i.next, %n
211  br i1 %cond, label %for.body, label %for.end
212
213for.end:
214  ret void
215}
216
217%f32.2 = type {float, float}
218define void @f32_factor_2(%f32.2* %data, i64 %n) #0 {
219entry:
220  br label %for.body
221
222; VF_2-LABEL:  Checking a loop in "f32_factor_2"
223; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4
224; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, float* %tmp1, align 4
225; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
226; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
227; VF_4-LABEL:  Checking a loop in "f32_factor_2"
228; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4
229; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, float* %tmp1, align 4
230; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
231; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
232; VF_8-LABEL:  Checking a loop in "f32_factor_2"
233; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, float* %tmp0, align 4
234; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, float* %tmp1, align 4
235; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
236; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
237; VF_16-LABEL: Checking a loop in "f32_factor_2"
238; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, float* %tmp0, align 4
239; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, float* %tmp1, align 4
240; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
241; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
242for.body:
243  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
244  %tmp0 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 0
245  %tmp1 = getelementptr inbounds %f32.2, %f32.2* %data, i64 %i, i32 1
246  %tmp2 = load float, float* %tmp0, align 4
247  %tmp3 = load float, float* %tmp1, align 4
248  store float 0.0, float* %tmp0, align 4
249  store float 0.0, float* %tmp1, align 4
250  %i.next = add nuw nsw i64 %i, 1
251  %cond = icmp slt i64 %i.next, %n
252  br i1 %cond, label %for.body, label %for.end
253
254for.end:
255  ret void
256}
257
258%f64.2 = type {double, double}
259define void @f64_factor_2(%f64.2* %data, i64 %n) #0 {
260entry:
261  br label %for.body
262
263; VF_2-LABEL:  Checking a loop in "f64_factor_2"
264; VF_2:          Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load double, double* %tmp0, align 8
265; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, double* %tmp1, align 8
266; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
267; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
268; VF_4-LABEL:  Checking a loop in "f64_factor_2"
269; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp2 = load double, double* %tmp0, align 8
270; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, double* %tmp1, align 8
271; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
272; VF_4-NEXT:     Found an estimated cost of 40 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
273; VF_8-LABEL:  Checking a loop in "f64_factor_2"
274; VF_8:          Found an estimated cost of 272 for VF 8 For instruction: %tmp2 = load double, double* %tmp0, align 8
275; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, double* %tmp1, align 8
276; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
277; VF_8-NEXT:     Found an estimated cost of 144 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
278; VF_16-LABEL: Checking a loop in "f64_factor_2"
279; VF_16:         Found an estimated cost of 1056 for VF 16 For instruction: %tmp2 = load double, double* %tmp0, align 8
280; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, double* %tmp1, align 8
281; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
282; VF_16-NEXT:    Found an estimated cost of 544 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
283for.body:
284  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
285  %tmp0 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 0
286  %tmp1 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 1
287  %tmp2 = load double, double* %tmp0, align 8
288  %tmp3 = load double, double* %tmp1, align 8
289  store double 0.0, double* %tmp0, align 8
290  store double 0.0, double* %tmp1, align 8
291  %i.next = add nuw nsw i64 %i, 1
292  %cond = icmp slt i64 %i.next, %n
293  br i1 %cond, label %for.body, label %for.end
294
295for.end:
296  ret void
297}
298
299
300
301; Factor 3
302
303%i8.3 = type {i8, i8, i8}
304define void @i8_factor_3(%i8.3* %data, i64 %n) #0 {
305entry:
306  br label %for.body
307
308; VF_2-LABEL:  Checking a loop in "i8_factor_3"
309; VF_2:          Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
310; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
311; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
312; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
313; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
314; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1
315; VF_4-LABEL:  Checking a loop in "i8_factor_3"
316; VF_4:          Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
317; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
318; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
319; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
320; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
321; VF_4-NEXT:     Found an estimated cost of 60 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1
322; VF_8-LABEL:  Checking a loop in "i8_factor_3"
323; VF_8:          Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
324; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
325; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
326; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
327; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
328; VF_8-NEXT:     Found an estimated cost of 216 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
329; VF_16-LABEL: Checking a loop in "i8_factor_3"
330; VF_16:         Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1
331; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp1, align 1
332; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp2, align 1
333; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
334; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
335; VF_16-NEXT:    Found an estimated cost of 816 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
336for.body:
337  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
338  %tmp0 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 0
339  %tmp1 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 1
340  %tmp2 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 2
341  %tmp3 = load i8, i8* %tmp0, align 1
342  %tmp4 = load i8, i8* %tmp1, align 1
343  %tmp5 = load i8, i8* %tmp2, align 1
344  store i8 0, i8* %tmp0, align 1
345  store i8 0, i8* %tmp1, align 1
346  store i8 0, i8* %tmp2, align 1
347  %i.next = add nuw nsw i64 %i, 1
348  %cond = icmp slt i64 %i.next, %n
349  br i1 %cond, label %for.body, label %for.end
350
351for.end:
352  ret void
353}
354
355%i16.3 = type {i16, i16, i16}
356define void @i16_factor_3(%i16.3* %data, i64 %n) #0 {
357entry:
358  br label %for.body
359
360; VF_2-LABEL:  Checking a loop in "i16_factor_3"
361; VF_2:          Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
362; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
363; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
364; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
365; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
366; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2
367; VF_4-LABEL:  Checking a loop in "i16_factor_3"
368; VF_4:          Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
369; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
370; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
371; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
372; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
373; VF_4-NEXT:     Found an estimated cost of 60 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
374; VF_8-LABEL:  Checking a loop in "i16_factor_3"
375; VF_8:          Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
376; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
377; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
378; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
379; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
380; VF_8-NEXT:     Found an estimated cost of 216 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
381; VF_16-LABEL: Checking a loop in "i16_factor_3"
382; VF_16:         Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2
383; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp1, align 2
384; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp2, align 2
385; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
386; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
387; VF_16-NEXT:    Found an estimated cost of 816 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
388for.body:
389  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
390  %tmp0 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 0
391  %tmp1 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 1
392  %tmp2 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 2
393  %tmp3 = load i16, i16* %tmp0, align 2
394  %tmp4 = load i16, i16* %tmp1, align 2
395  %tmp5 = load i16, i16* %tmp2, align 2
396  store i16 0, i16* %tmp0, align 2
397  store i16 0, i16* %tmp1, align 2
398  store i16 0, i16* %tmp2, align 2
399  %i.next = add nuw nsw i64 %i, 1
400  %cond = icmp slt i64 %i.next, %n
401  br i1 %cond, label %for.body, label %for.end
402
403for.end:
404  ret void
405}
406
407%i32.3 = type {i32, i32, i32}
408define void @i32_factor_3(%i32.3* %data, i64 %n) #0 {
409entry:
410  br label %for.body
411
412; VF_2-LABEL:  Checking a loop in "i32_factor_3"
413; VF_2:          Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
414; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
415; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
416; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
417; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
418; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
419; VF_4-LABEL:  Checking a loop in "i32_factor_3"
420; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
421; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
422; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
423; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
424; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
425; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
426; VF_8-LABEL:  Checking a loop in "i32_factor_3"
427; VF_8:          Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
428; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
429; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
430; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
431; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
432; VF_8-NEXT:     Found an estimated cost of 216 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
433; VF_16-LABEL: Checking a loop in "i32_factor_3"
434; VF_16:         Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4
435; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp1, align 4
436; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp2, align 4
437; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
438; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
439; VF_16-NEXT:    Found an estimated cost of 816 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
440for.body:
441  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
442  %tmp0 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 0
443  %tmp1 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 1
444  %tmp2 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 2
445  %tmp3 = load i32, i32* %tmp0, align 4
446  %tmp4 = load i32, i32* %tmp1, align 4
447  %tmp5 = load i32, i32* %tmp2, align 4
448  store i32 0, i32* %tmp0, align 4
449  store i32 0, i32* %tmp1, align 4
450  store i32 0, i32* %tmp2, align 4
451  %i.next = add nuw nsw i64 %i, 1
452  %cond = icmp slt i64 %i.next, %n
453  br i1 %cond, label %for.body, label %for.end
454
455for.end:
456  ret void
457}
458
459%i64.3 = type {i64, i64, i64}
460define void @i64_factor_3(%i64.3* %data, i64 %n) #0 {
461entry:
462  br label %for.body
463
464; VF_2-LABEL:  Checking a loop in "i64_factor_3"
465; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
466; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
467; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
468; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
469; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
470; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8
471; VF_4-LABEL:  Checking a loop in "i64_factor_3"
472; VF_4:          Found an estimated cost of 120 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
473; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
474; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
475; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
476; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
477; VF_4-NEXT:     Found an estimated cost of 72 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8
478; VF_8-LABEL:  Checking a loop in "i64_factor_3"
479; VF_8:          Found an estimated cost of 432 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
480; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
481; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
482; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
483; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
484; VF_8-NEXT:     Found an estimated cost of 240 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8
485; VF_16-LABEL: Checking a loop in "i64_factor_3"
486; VF_16:         Found an estimated cost of 1632 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp0, align 8
487; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp1, align 8
488; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp2, align 8
489; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
490; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
491; VF_16-NEXT:    Found an estimated cost of 864 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8
492for.body:
493  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
494  %tmp0 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 0
495  %tmp1 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 1
496  %tmp2 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 2
497  %tmp3 = load i64, i64* %tmp0, align 8
498  %tmp4 = load i64, i64* %tmp1, align 8
499  %tmp5 = load i64, i64* %tmp2, align 8
500  store i64 0, i64* %tmp0, align 8
501  store i64 0, i64* %tmp1, align 8
502  store i64 0, i64* %tmp2, align 8
503  %i.next = add nuw nsw i64 %i, 1
504  %cond = icmp slt i64 %i.next, %n
505  br i1 %cond, label %for.body, label %for.end
506
507for.end:
508  ret void
509}
510
511%f16.3 = type {half, half, half}
512define void @f16_factor_3(%f16.3* %data, i64 %n) #0 {
513entry:
514  br label %for.body
515
516; VF_2-LABEL:  Checking a loop in "f16_factor_3"
517; VF_2:          Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load half, half* %tmp0, align 2
518; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, half* %tmp1, align 2
519; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp2, align 2
520; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
521; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
522; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2
523; VF_4-LABEL:  Checking a loop in "f16_factor_3"
524; VF_4:          Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load half, half* %tmp0, align 2
525; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, half* %tmp1, align 2
526; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp2, align 2
527; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
528; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
529; VF_4-NEXT:     Found an estimated cost of 60 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2
530; VF_8-LABEL:  Checking a loop in "f16_factor_3"
531; VF_8:          Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load half, half* %tmp0, align 2
532; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, half* %tmp1, align 2
533; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp2, align 2
534; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
535; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
536; VF_8-NEXT:     Found an estimated cost of 216 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2
537; VF_16-LABEL: Checking a loop in "f16_factor_3"
538; VF_16:         Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load half, half* %tmp0, align 2
539; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, half* %tmp1, align 2
540; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp2, align 2
541; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
542; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
543; VF_16-NEXT:    Found an estimated cost of 816 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2
544for.body:
545  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
546  %tmp0 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 0
547  %tmp1 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 1
548  %tmp2 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 2
549  %tmp3 = load half, half* %tmp0, align 2
550  %tmp4 = load half, half* %tmp1, align 2
551  %tmp5 = load half, half* %tmp2, align 2
552  store half 0.0, half* %tmp0, align 2
553  store half 0.0, half* %tmp1, align 2
554  store half 0.0, half* %tmp2, align 2
555  %i.next = add nuw nsw i64 %i, 1
556  %cond = icmp slt i64 %i.next, %n
557  br i1 %cond, label %for.body, label %for.end
558
559for.end:
560  ret void
561}
562
563%f32.3 = type {float, float, float}
564define void @f32_factor_3(%f32.3* %data, i64 %n) #0 {
565entry:
566  br label %for.body
567
568; VF_2-LABEL:  Checking a loop in "f32_factor_3"
569; VF_2:          Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load float, float* %tmp0, align 4
570; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, float* %tmp1, align 4
571; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp2, align 4
572; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
573; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
574; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
575; VF_4-LABEL:  Checking a loop in "f32_factor_3"
576; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4
577; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, float* %tmp1, align 4
578; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp2, align 4
579; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
580; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
581; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
582; VF_8-LABEL:  Checking a loop in "f32_factor_3"
583; VF_8:          Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4
584; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, float* %tmp1, align 4
585; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp2, align 4
586; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
587; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
588; VF_8-NEXT:     Found an estimated cost of 216 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4
589; VF_16-LABEL: Checking a loop in "f32_factor_3"
590; VF_16:         Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load float, float* %tmp0, align 4
591; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, float* %tmp1, align 4
592; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp2, align 4
593; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
594; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
595; VF_16-NEXT:    Found an estimated cost of 816 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4
596for.body:
597  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
598  %tmp0 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 0
599  %tmp1 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 1
600  %tmp2 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 2
601  %tmp3 = load float, float* %tmp0, align 4
602  %tmp4 = load float, float* %tmp1, align 4
603  %tmp5 = load float, float* %tmp2, align 4
604  store float 0.0, float* %tmp0, align 4
605  store float 0.0, float* %tmp1, align 4
606  store float 0.0, float* %tmp2, align 4
607  %i.next = add nuw nsw i64 %i, 1
608  %cond = icmp slt i64 %i.next, %n
609  br i1 %cond, label %for.body, label %for.end
610
611for.end:
612  ret void
613}
614
615%f64.3 = type {double, double, double}
616define void @f64_factor_3(%f64.3* %data, i64 %n) #0 {
617entry:
618  br label %for.body
619
620; VF_2-LABEL:  Checking a loop in "f64_factor_3"
621; VF_2:          Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load double, double* %tmp0, align 8
622; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, double* %tmp1, align 8
623; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp2, align 8
624; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
625; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
626; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8
627; VF_4-LABEL:  Checking a loop in "f64_factor_3"
628; VF_4:          Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load double, double* %tmp0, align 8
629; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, double* %tmp1, align 8
630; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp2, align 8
631; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
632; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
633; VF_4-NEXT:     Found an estimated cost of 60 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8
634; VF_8-LABEL:  Checking a loop in "f64_factor_3"
635; VF_8:          Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load double, double* %tmp0, align 8
636; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, double* %tmp1, align 8
637; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp2, align 8
638; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
639; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
640; VF_8-NEXT:     Found an estimated cost of 216 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8
641; VF_16-LABEL: Checking a loop in "f64_factor_3"
642; VF_16:         Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load double, double* %tmp0, align 8
643; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, double* %tmp1, align 8
644; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp2, align 8
645; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
646; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
647; VF_16-NEXT:    Found an estimated cost of 816 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8
648for.body:
649  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
650  %tmp0 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 0
651  %tmp1 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 1
652  %tmp2 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 2
653  %tmp3 = load double, double* %tmp0, align 8
654  %tmp4 = load double, double* %tmp1, align 8
655  %tmp5 = load double, double* %tmp2, align 8
656  store double 0.0, double* %tmp0, align 8
657  store double 0.0, double* %tmp1, align 8
658  store double 0.0, double* %tmp2, align 8
659  %i.next = add nuw nsw i64 %i, 1
660  %cond = icmp slt i64 %i.next, %n
661  br i1 %cond, label %for.body, label %for.end
662
663for.end:
664  ret void
665}
666
667
668; Factor 4
669
670%i8.4 = type {i8, i8, i8, i8}
671define void @i8_factor_4(%i8.4* %data, i64 %n) #0 {
672entry:
673  br label %for.body
674
675; VF_2-LABEL:  Checking a loop in "i8_factor_4"
676; VF_2:          Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
677; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
678; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
679; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
680; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1
681; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1
682; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1
683; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i8 0, i8* %tmp3, align 1
684; VF_4-LABEL: Checking a loop in "i8_factor_4"
685; VF_4:         Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
686; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
687; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
688; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
689; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1
690; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1
691; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1
692; VF_4-NEXT:    Found an estimated cost of 80 for VF 4 For instruction: store i8 0, i8* %tmp3, align 1
693; VF_8-LABEL:  Checking a loop in "i8_factor_4"
694; VF_8:          Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
695; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
696; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
697; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
698; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1
699; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1
700; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1
701; VF_8-NEXT:     Found an estimated cost of 288 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1
702; VF_16-LABEL: Checking a loop in "i8_factor_4"
703; VF_16:         Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1
704; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1
705; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1
706; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, i8* %tmp3, align 1
707; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1
708; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1
709; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1
710; VF_16-NEXT:    Found an estimated cost of 1088 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1
711for.body:
712  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
713  %tmp0 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 0
714  %tmp1 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 1
715  %tmp2 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 2
716  %tmp3 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 3
717  %tmp4 = load i8, i8* %tmp0, align 1
718  %tmp5 = load i8, i8* %tmp1, align 1
719  %tmp6 = load i8, i8* %tmp2, align 1
720  %tmp7 = load i8, i8* %tmp3, align 1
721  store i8 0, i8* %tmp0, align 1
722  store i8 0, i8* %tmp1, align 1
723  store i8 0, i8* %tmp2, align 1
724  store i8 0, i8* %tmp3, align 1
725  %i.next = add nuw nsw i64 %i, 1
726  %cond = icmp slt i64 %i.next, %n
727  br i1 %cond, label %for.body, label %for.end
728
729for.end:
730  ret void
731}
732
733%i16.4 = type {i16, i16, i16, i16}
734define void @i16_factor_4(%i16.4* %data, i64 %n) #0 {
735entry:
736  br label %for.body
737
738; VF_2-LABEL:  Checking a loop in "i16_factor_4"
739; VF_2:          Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
740; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
741; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
742; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
743; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2
744; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2
745; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2
746; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i16 0, i16* %tmp3, align 2
747; VF_4-LABEL:  Checking a loop in "i16_factor_4"
748; VF_4:          Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
749; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
750; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
751; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
752; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2
753; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2
754; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2
755; VF_4-NEXT:     Found an estimated cost of 80 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2
756; VF_8-LABEL:  Checking a loop in "i16_factor_4"
757; VF_8:          Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
758; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
759; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
760; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
761; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2
762; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2
763; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2
764; VF_8-NEXT:     Found an estimated cost of 288 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2
765; VF_16-LABEL: Checking a loop in "i16_factor_4"
766; VF_16:         Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2
767; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2
768; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2
769; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, i16* %tmp3, align 2
770; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2
771; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2
772; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2
773; VF_16-NEXT:    Found an estimated cost of 1088 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2
774for.body:
775  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
776  %tmp0 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 0
777  %tmp1 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 1
778  %tmp2 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 2
779  %tmp3 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 3
780  %tmp4 = load i16, i16* %tmp0, align 2
781  %tmp5 = load i16, i16* %tmp1, align 2
782  %tmp6 = load i16, i16* %tmp2, align 2
783  %tmp7 = load i16, i16* %tmp3, align 2
784  store i16 0, i16* %tmp0, align 2
785  store i16 0, i16* %tmp1, align 2
786  store i16 0, i16* %tmp2, align 2
787  store i16 0, i16* %tmp3, align 2
788  %i.next = add nuw nsw i64 %i, 1
789  %cond = icmp slt i64 %i.next, %n
790  br i1 %cond, label %for.body, label %for.end
791
792for.end:
793  ret void
794}
795
796%i32.4 = type {i32, i32, i32, i32}
797define void @i32_factor_4(%i32.4* %data, i64 %n) #0 {
798entry:
799  br label %for.body
800
801; VF_2-LABEL:  Checking a loop in "i32_factor_4"
802; VF_2:          Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
803; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
804; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
805; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
806; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4
807; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4
808; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4
809; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4
810; VF_4-LABEL:  Checking a loop in "i32_factor_4"
811; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
812; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
813; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
814; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
815; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4
816; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4
817; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4
818; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4
819; VF_8-LABEL:  Checking a loop in "i32_factor_4"
820; VF_8:          Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
821; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
822; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
823; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
824; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4
825; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4
826; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4
827; VF_8-NEXT:     Found an estimated cost of 288 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4
828; VF_16-LABEL: Checking a loop in "i32_factor_4"
829; VF_16:         Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4
830; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4
831; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4
832; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, i32* %tmp3, align 4
833; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4
834; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4
835; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4
836; VF_16-NEXT:    Found an estimated cost of 1088 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4
837for.body:
838  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
839  %tmp0 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 0
840  %tmp1 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 1
841  %tmp2 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 2
842  %tmp3 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 3
843  %tmp4 = load i32, i32* %tmp0, align 4
844  %tmp5 = load i32, i32* %tmp1, align 4
845  %tmp6 = load i32, i32* %tmp2, align 4
846  %tmp7 = load i32, i32* %tmp3, align 4
847  store i32 0, i32* %tmp0, align 4
848  store i32 0, i32* %tmp1, align 4
849  store i32 0, i32* %tmp2, align 4
850  store i32 0, i32* %tmp3, align 4
851  %i.next = add nuw nsw i64 %i, 1
852  %cond = icmp slt i64 %i.next, %n
853  br i1 %cond, label %for.body, label %for.end
854
855for.end:
856  ret void
857}
858
859%i64.4 = type {i64, i64, i64, i64}
860define void @i64_factor_4(%i64.4* %data, i64 %n) #0 {
861entry:
862  br label %for.body
863
864; VF_2-LABEL:  Checking a loop in "i64_factor_4"
865; VF_2:          Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
866; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
867; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
868; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
869; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8
870; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8
871; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8
872; VF_2-NEXT:     Found an estimated cost of 32 for VF 2 For instruction: store i64 0, i64* %tmp3, align 8
873; VF_4-LABEL:  Checking a loop in "i64_factor_4"
874; VF_4:          Found an estimated cost of 160 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
875; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
876; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
877; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
878; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8
879; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8
880; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8
881; VF_4-NEXT:     Found an estimated cost of 96 for VF 4 For instruction: store i64 0, i64* %tmp3, align 8
882; VF_8-LABEL:  Checking a loop in "i64_factor_4"
883; VF_8:          Found an estimated cost of 576 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
884; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
885; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
886; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
887; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8
888; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8
889; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8
890; VF_8-NEXT:     Found an estimated cost of 320 for VF 8 For instruction: store i64 0, i64* %tmp3, align 8
891; VF_16-LABEL: Checking a loop in "i64_factor_4"
892; VF_16:         Found an estimated cost of 2176 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp0, align 8
893; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp1, align 8
894; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, i64* %tmp2, align 8
895; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, i64* %tmp3, align 8
896; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8
897; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8
898; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8
899; VF_16-NEXT:    Found an estimated cost of 1152 for VF 16 For instruction: store i64 0, i64* %tmp3, align 8
900for.body:
901  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
902  %tmp0 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 0
903  %tmp1 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 1
904  %tmp2 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 2
905  %tmp3 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 3
906  %tmp4 = load i64, i64* %tmp0, align 8
907  %tmp5 = load i64, i64* %tmp1, align 8
908  %tmp6 = load i64, i64* %tmp2, align 8
909  %tmp7 = load i64, i64* %tmp3, align 8
910  store i64 0, i64* %tmp0, align 8
911  store i64 0, i64* %tmp1, align 8
912  store i64 0, i64* %tmp2, align 8
913  store i64 0, i64* %tmp3, align 8
914  %i.next = add nuw nsw i64 %i, 1
915  %cond = icmp slt i64 %i.next, %n
916  br i1 %cond, label %for.body, label %for.end
917
918for.end:
919  ret void
920}
921
922%f16.4 = type {half, half, half, half}
923define void @f16_factor_4(%f16.4* %data, i64 %n) #0 {
924entry:
925  br label %for.body
926
927; VF_2-LABEL:  Checking a loop in "f16_factor_4"
928; VF_2:          Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load half, half* %tmp0, align 2
929; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp1, align 2
930; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, half* %tmp2, align 2
931; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, half* %tmp3, align 2
932; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2
933; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2
934; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2
935; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store half 0xH0000, half* %tmp3, align 2
936; VF_4-LABEL:  Checking a loop in "f16_factor_4"
937; VF_4:          Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load half, half* %tmp0, align 2
938; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp1, align 2
939; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, half* %tmp2, align 2
940; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, half* %tmp3, align 2
941; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2
942; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2
943; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2
944; VF_4-NEXT:     Found an estimated cost of 80 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2
945; VF_8-LABEL:  Checking a loop in "f16_factor_4"
946; VF_8:          Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2
947; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp1, align 2
948; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, half* %tmp2, align 2
949; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, half* %tmp3, align 2
950; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2
951; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2
952; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2
953; VF_8-NEXT:     Found an estimated cost of 288 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2
954; VF_16-LABEL: Checking a loop in "f16_factor_4"
955; VF_16:         Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2
956; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp1, align 2
957; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, half* %tmp2, align 2
958; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, half* %tmp3, align 2
959; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2
960; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2
961; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2
962; VF_16-NEXT:    Found an estimated cost of 1088 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2
963for.body:
964  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
965  %tmp0 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 0
966  %tmp1 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 1
967  %tmp2 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 2
968  %tmp3 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 3
969  %tmp4 = load half, half* %tmp0, align 2
970  %tmp5 = load half, half* %tmp1, align 2
971  %tmp6 = load half, half* %tmp2, align 2
972  %tmp7 = load half, half* %tmp3, align 2
973  store half 0.0, half* %tmp0, align 2
974  store half 0.0, half* %tmp1, align 2
975  store half 0.0, half* %tmp2, align 2
976  store half 0.0, half* %tmp3, align 2
977  %i.next = add nuw nsw i64 %i, 1
978  %cond = icmp slt i64 %i.next, %n
979  br i1 %cond, label %for.body, label %for.end
980
981for.end:
982  ret void
983}
984
985%f32.4 = type {float, float, float, float}
986define void @f32_factor_4(%f32.4* %data, i64 %n) #0 {
987entry:
988  br label %for.body
989
990; VF_2-LABEL:  Checking a loop in "f32_factor_4"
991; VF_2:          Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load float, float* %tmp0, align 4
992; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp1, align 4
993; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, float* %tmp2, align 4
994; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, float* %tmp3, align 4
995; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4
996; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4
997; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4
998; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4
999; VF_4-LABEL:  Checking a loop in "f32_factor_4"
1000; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4
1001; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4
1002; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, float* %tmp2, align 4
1003; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, float* %tmp3, align 4
1004; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4
1005; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4
1006; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4
1007; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4
1008; VF_8-LABEL:  Checking a loop in "f32_factor_4"
1009; VF_8:          Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4
1010; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4
1011; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, float* %tmp2, align 4
1012; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, float* %tmp3, align 4
1013; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4
1014; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4
1015; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4
1016; VF_8-NEXT:     Found an estimated cost of 288 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4
1017; VF_16-LABEL: Checking a loop in "f32_factor_4"
1018; VF_16:         Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4
1019; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp1, align 4
1020; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, float* %tmp2, align 4
1021; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, float* %tmp3, align 4
1022; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4
1023; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4
1024; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4
1025; VF_16-NEXT:    Found an estimated cost of 1088 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4
1026for.body:
1027  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
1028  %tmp0 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 0
1029  %tmp1 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 1
1030  %tmp2 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 2
1031  %tmp3 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 3
1032  %tmp4 = load float, float* %tmp0, align 4
1033  %tmp5 = load float, float* %tmp1, align 4
1034  %tmp6 = load float, float* %tmp2, align 4
1035  %tmp7 = load float, float* %tmp3, align 4
1036  store float 0.0, float* %tmp0, align 4
1037  store float 0.0, float* %tmp1, align 4
1038  store float 0.0, float* %tmp2, align 4
1039  store float 0.0, float* %tmp3, align 4
1040  %i.next = add nuw nsw i64 %i, 1
1041  %cond = icmp slt i64 %i.next, %n
1042  br i1 %cond, label %for.body, label %for.end
1043
1044for.end:
1045  ret void
1046}
1047
1048%f64.4 = type {double, double, double, double}
1049define void @f64_factor_4(%f64.4* %data, i64 %n) #0 {
1050entry:
1051  br label %for.body
1052
1053; VF_2-LABEL:  Checking a loop in "f64_factor_4"
1054; VF_2:          Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load double, double* %tmp0, align 8
1055; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp1, align 8
1056; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, double* %tmp2, align 8
1057; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, double* %tmp3, align 8
1058; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8
1059; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8
1060; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8
1061; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store double 0.000000e+00, double* %tmp3, align 8
1062; VF_4-LABEL:  Checking a loop in "f64_factor_4"
1063; VF_4:          Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load double, double* %tmp0, align 8
1064; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp1, align 8
1065; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, double* %tmp2, align 8
1066; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, double* %tmp3, align 8
1067; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8
1068; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8
1069; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8
1070; VF_4-NEXT:     Found an estimated cost of 80 for VF 4 For instruction: store double 0.000000e+00, double* %tmp3, align 8
1071; VF_8-LABEL:  Checking a loop in "f64_factor_4"
1072; VF_8:          Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load double, double* %tmp0, align 8
1073; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp1, align 8
1074; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, double* %tmp2, align 8
1075; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, double* %tmp3, align 8
1076; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8
1077; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8
1078; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8
1079; VF_8-NEXT:     Found an estimated cost of 288 for VF 8 For instruction: store double 0.000000e+00, double* %tmp3, align 8
1080; VF_16-LABEL: Checking a loop in "f64_factor_4"
1081; VF_16:         Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load double, double* %tmp0, align 8
1082; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp1, align 8
1083; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, double* %tmp2, align 8
1084; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, double* %tmp3, align 8
1085; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8
1086; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8
1087; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8
1088; VF_16-NEXT:    Found an estimated cost of 1088 for VF 16 For instruction: store double 0.000000e+00, double* %tmp3, align 8
1089for.body:
1090  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
1091  %tmp0 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 0
1092  %tmp1 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 1
1093  %tmp2 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 2
1094  %tmp3 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 3
1095  %tmp4 = load double, double* %tmp0, align 8
1096  %tmp5 = load double, double* %tmp1, align 8
1097  %tmp6 = load double, double* %tmp2, align 8
1098  %tmp7 = load double, double* %tmp3, align 8
1099  store double 0.0, double* %tmp0, align 8
1100  store double 0.0, double* %tmp1, align 8
1101  store double 0.0, double* %tmp2, align 8
1102  store double 0.0, double* %tmp3, align 8
1103  %i.next = add nuw nsw i64 %i, 1
1104  %cond = icmp slt i64 %i.next, %n
1105  br i1 %cond, label %for.body, label %for.end
1106
1107for.end:
1108  ret void
1109}
1110
1111attributes #0 = { "target-features"="+mve.fp" }
1112